NVIDIA-NeMo · soluwalana · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
@@ -32,6 +32,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     -e /app/sdk/python/nemo-platform \
     -e /app/packages/nemo_platform_plugin \
     -e /app/packages/nmp_common \
+    -e /app/packages/nmp_customization_common \
     -e /app/services/automodel
 
 # Re-pin nemo_automodel from the base clone without re-resolving transformers (already in base venv).

@@ -4,14 +4,18 @@
 # model_entity).
 #
 # Install steps:
-#   1. `uv pip install unsloth --torch-backend=auto`. This is unsloth's
-#      canonical install command (per their README). It pulls unsloth +
-#      unsloth_zoo + the entire HF stack (transformers, trl, peft, accelerate,
-#      datasets, bitsandbytes, xformers, etc.) at the versions unsloth's own
-#      pyproject.toml has constrained — including explicit !=X.Y.Z blocklists
-#      for known-broken transformers/trl releases. We deliberately don't
-#      second-guess these pins; they're tested upstream.
-#   1b. flash-attn — optional for unsloth and not installed by step 1. Without
+#   1. `uv pip install unsloth --torch-backend=auto` plus explicit
+#      `transformers==4.57.6` and `huggingface-hub==0.36.2` pins (transformers
+#      4.57.x requires hub <1.0; platform glue would otherwise pull hub 1.x).
+#      Unsloth's resolver still pulls unsloth_zoo
+#      and the rest of the HF stack (trl, peft, accelerate, datasets,
+#      bitsandbytes, xformers, etc.). `--overrides preserve_base_torch.txt`
+#      blocks uv from installing/upgrading torch into the venv so the NGC
+#      base's PyTorch + CUDA remain the runtime stack.
+#   1b. bitsandbytes — compiled from source against NGC CUDA 13.1 (same pattern
+#       as docker/Dockerfile.nmp-automodel-base). PyPI wheels
+#       only ship through cuda130; source build replaces the wheel from step 1.
+#   1c. flash-attn — optional for unsloth and not installed by step 1. Without
 #       it Unsloth falls back when xformers is also missing (common on newer CUDA
 #       stacks), logging "FA2 = False / Xformers = None". Installed immediately
 #       after unsloth so pip does not re-resolve the HF stack.
@@ -39,6 +43,7 @@ ENV VIRTUAL_ENV=/opt/venv \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     OTEL_PYTHON_EXCLUDED_URLS="health"
 ENV PATH="/opt/venv/bin:/root/.local/bin:${PATH}"
+ENV UNSLOTH_SKIP_TORCHVISION_CHECK=1
 
 # --system-site-packages lets the venv inherit the NGC base's pre-built torch.
 # Without this, `uv pip install unsloth --torch-backend=auto` would download a
@@ -54,21 +59,53 @@ ARG USERNAME=ubuntu
 ARG USER_UID=1000
 ARG USER_GID=1000
 ARG UNSLOTH_VERSION=2026.6.1
+ARG TRANSFORMERS_VERSION=4.57.6
+ARG HF_HUB_VERSION=0.36.2
+ARG BITSANDBYTES_VERSION=0.49.1
+ARG BNB_MAX_JOBS=10
 
 WORKDIR /app
 
+COPY docker/unsloth/preserve_base_torch.txt /opt/docker/preserve_base_torch.txt
+COPY docker/unsloth/no_override_requirements.txt /opt/docker/no_override_requirements.txt
+
 RUN mkdir -p /home/${USERNAME}/.cache && \
     chown -R ${USER_UID}:${USER_GID} /home/${USERNAME}
 
-# Step 1: install unsloth via its own resolver. --torch-backend=auto tells uv
+# Step 1: install unsloth + pinned transformers. --torch-backend=auto tells uv
 # to detect the existing torch's CUDA build (from --system-site-packages
 # inheritance) and pick the matching xformers / bitsandbytes wheels.
+# preserve_base_torch.txt prevents uv from replacing the NGC torch stack.
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --python ${VIRTUAL_ENV}/bin/python --no-cache \
     --torch-backend=auto \
-    unsloth==${UNSLOTH_VERSION}
+    --overrides /opt/docker/preserve_base_torch.txt \
+    unsloth==${UNSLOTH_VERSION} \
+    transformers==${TRANSFORMERS_VERSION} \
+    huggingface-hub==${HF_HUB_VERSION}
+
+# Re-pin transformers + huggingface-hub without touching torch.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --python ${VIRTUAL_ENV}/bin/python --no-cache \
+    --overrides /opt/docker/preserve_base_torch.txt \
+    --reinstall-package transformers \
+    --reinstall-package huggingface-hub \
+    transformers==${TRANSFORMERS_VERSION} \
+    huggingface-hub==${HF_HUB_VERSION}
 
-# TODO: Step 1b: Flash Attention 2 — compiled from source against the NGC 26.02 torch.
+# Step 1b: bitsandbytes from source — matches automodel base (CUDA 13.1 nvcc).
+RUN --mount=type=cache,target=/root/.cache/uv \
+    git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git /tmp/bitsandbytes && \
+    cd /tmp/bitsandbytes && \
+    git checkout ${BITSANDBYTES_VERSION} && \
+    cmake -DCOMPUTE_CAPABILITY="75;80;86;87;89;90;100;103;110;120;121" -DCOMPUTE_BACKEND=cuda -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -S . && \
+    make -j${BNB_MAX_JOBS} && \
+    uv pip install --python ${VIRTUAL_ENV}/bin/python --no-cache scikit-build-core --no-deps && \
+    uv pip install --python ${VIRTUAL_ENV}/bin/python --no-cache --no-build-isolation --no-deps --force-reinstall . && \
+    uv pip uninstall --python ${VIRTUAL_ENV}/bin/python scikit-build-core && \
+    rm -rf /tmp/bitsandbytes
+
+# TODO: Step 1c: Flash Attention 2 — compiled from source against the NGC 26.02 torch.
 # /usr/local/cuda symlinks to an older toolkit; use /usr/local/cuda-13.1 instead.
 # Cap parallel nvcc/ninja work — default uses all CPUs and OOMs typical build hosts.
 # Put flash attention back in when we have a working wheel in a separate image.
@@ -93,16 +130,27 @@ RUN chown -R ${USER_UID}:${USER_GID} /app
 # resolver to re-evaluate the whole HF stack.
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --python ${VIRTUAL_ENV}/bin/python --no-cache \
+    --overrides /opt/docker/no_override_requirements.txt \
     -e /app/sdk/python/nemo-platform \
     -e /app/packages/nemo_platform_plugin \
     -e /app/packages/nmp_common \
-    -e /app/services/unsloth
+    -e /app/packages/nmp_customization_common \
+    -e "/app/services/unsloth[integrations]"
 
-# Re-pin hf-transfer (used by HF_HUB_ENABLE_HF_TRANSFER above).
+# hf-transfer can pull huggingface-hub 1.x — install then re-pin hub + transformers.
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --python ${VIRTUAL_ENV}/bin/python --no-cache \
+    --overrides /opt/docker/preserve_base_torch.txt \
     "hf-transfer>=0.1.8,<0.2"
 
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --python ${VIRTUAL_ENV}/bin/python --no-cache \
+    --overrides /opt/docker/preserve_base_torch.txt \
+    --reinstall-package huggingface-hub \
+    --reinstall-package transformers \
+    huggingface-hub==${HF_HUB_VERSION} \
+    transformers==${TRANSFORMERS_VERSION}
+
 ENTRYPOINT ["/opt/venv/bin/python"]
 CMD ["-m", "nmp.unsloth.tasks.training", "--help"]
 

@@ -6,6 +6,9 @@
 FROM scratch AS platform-workspace
 # Use a reduced workspace file for this partial source tree.
 COPY docker/automodel/pyproject.workspace.toml pyproject.toml
+# uv --overrides file the training Dockerfile reads at /app/docker/automodel/.
+# (The docker/ tree moved out of services/automodel/, so it no longer rides in
+# via `COPY services/automodel`; copy it explicitly into the workspace slice.)
 COPY docker/automodel/no_override_requirements.txt docker/automodel/no_override_requirements.txt
 # nemo-platform-sdk hatch build force-includes docs/ from repo root.
 # docs/fern/openapi/openapi.yaml is a symlink to ../../../openapi/openapi.yaml.
@@ -14,6 +17,7 @@ COPY openapi openapi
 COPY packages/nmp_build_tools packages/nmp_build_tools
 COPY packages/models packages/models
 COPY packages/nmp_common packages/nmp_common
+COPY packages/nmp_customization_common packages/nmp_customization_common
 COPY packages/nemo_platform_plugin packages/nemo_platform_plugin
 COPY sdk/python/nemo-platform sdk/python/nemo-platform
 COPY services/automodel services/automodel

@@ -16,6 +16,7 @@ members = [
   "sdk/python/nemo-platform",
   "packages/nemo_platform_plugin",
   "packages/nmp_common",
+  "packages/nmp_customization_common",
   "services/automodel",
   "services/core/models",
 ]
@@ -26,5 +27,6 @@ models = { workspace = true }
 nemo-platform-sdk = { workspace = true }
 nemo-platform-plugin = { workspace = true }
 nmp-common = { workspace = true }
+nmp-customization-common = { workspace = true }
 nmp-automodel = { workspace = true }
 nmp-models = { workspace = true }
@@ -14,6 +14,7 @@ COPY docs docs
 COPY openapi openapi
 COPY packages/nmp_build_tools packages/nmp_build_tools
 COPY packages/nmp_common packages/nmp_common
+COPY packages/nmp_customization_common packages/nmp_customization_common
 COPY packages/nemo_platform_plugin packages/nemo_platform_plugin
 COPY sdk/python/nemo-platform sdk/python/nemo-platform
 COPY services/unsloth services/unsloth
@@ -49,12 +49,12 @@ docker buildx bake \
 The build pulls the NGC PyTorch base, then runs unsloth's canonical install
 in two steps:
 
-1. `uv pip install unsloth --torch-backend=auto` — this is the command
-   straight from unsloth's README. It pulls `unsloth`, `unsloth_zoo`, and the
-   full HF stack (transformers, trl, peft, accelerate, datasets, bitsandbytes,
-   xformers) at versions tested upstream — we deliberately don't pin any of
-   them on our end, because unsloth's pyproject already has precise
-   `!=X.Y.Z` blocklists for known-broken releases.
+1. `uv pip install unsloth --torch-backend=auto transformers==4.57.6 huggingface-hub==0.36.2` with
+   `preserve_base_torch.txt` overrides so the NGC base's PyTorch + CUDA are not
+   replaced. Unsloth's resolver still pulls `unsloth_zoo`, trl, peft,
+   accelerate, datasets, bitsandbytes, and xformers. **transformers is pinned
+   explicitly** to `4.57.6` (override at build time via
+   `--build-arg TRANSFORMERS_VERSION=...`).
 1b. Flash Attention 2 (Dockerfile step 1b) — source build with
     `--no-build-isolation` against the NGC base torch (cached Docker layer).
     Parallelism is capped via `MAX_JOBS` (default `2`, override with bake arg
@@ -275,7 +275,8 @@ nemo models adapters retrieve qwen-unsloth-smoke-out \
 | `compile()` errors with "platform.runtime: docker" | Set `platform.runtime: docker` in `~/.nemo/config.yaml` and restart services. |
 | `compile()` errors with "Docker daemon unreachable" | Confirm `docker info` works as the user running `nemo services`. |
 | First job step errors with `Model 'X' has no fileset attached` | Attach a fileset to the model entity (`nemo models update --fileset ...`). |
-| `training` step errors with `bitsandbytes`/CUDA mismatch | Rebuild the image — the base NGC PyTorch tag may have moved. |
+| `training` step errors with `bitsandbytes`/CUDA mismatch (`libbitsandbytes_cuda131.so` not found) | Rebuild `nmp-unsloth-training` — the image compiles bitsandbytes from source against NGC CUDA 13.1 (same pattern as `nmp-automodel-base`). Override `BNB_MAX_JOBS` at build time if nvcc OOMs. |
+| `WandbCallback requires wandb to be installed` | Rebuild `nmp-unsloth-training` — the image installs `wandb` and `mlflow-skinny` for integrations. |
 | `training` step OOMs on a small GPU | Reduce `model.max_seq_length` and / or set `model.load_in_4bit: true`. |
 | `model-entity-creation` errors with "Adapter already exists" | Pick a fresh `output.name` (the unsloth compiler is "always create"; no overwrite). |
 | Step config not picked up (`NEMO_JOB_STEP_CONFIG_FILE_PATH is not set`) | The container was started outside the Jobs runner — only platform-driven submit populates this. |
@@ -300,13 +301,13 @@ nemo files filesets delete qwen-unsloth-smoke-out -w default
   separate ML stack. If you need both backends on the same cluster, run
   both images side by side; jobs from each backend route to their own
   `nmp-{backend}-training` image via env-var overrides.
-- **Why we don't pin transformers / trl / peft / bitsandbytes** — unsloth's
-  own pyproject already constrains them tightly (e.g.
-  `transformers>=4.51.3,!=4.52.0..3,!=4.53.0,!=4.54.0,!=4.55.0..1,!=4.57.0,
-  !=4.57.4..5,!=5.0.0,!=5.1.0,<=5.5.0`). Our `[unsloth]` extra in
-  `services/unsloth/pyproject.toml` is just `["unsloth[huggingface]"]` —
-  delegating everything to upstream so we don't ship our own subtly-wrong
-  constraints.
-- **No CUDA wheels are pre-built** — `bitsandbytes` ships PyPI wheels
-  (Ampere+; for older arches, swap to a source build or pin a compatible
-  release).
+- **transformers + huggingface-hub pins** — the training image pins `transformers==4.57.6`
+  and `huggingface-hub==0.36.2` in
+  `Dockerfile.nmp-unsloth-training` (compatible with unsloth's upstream
+  blocklists). Other HF deps (trl, peft, bitsandbytes, etc.) still come from
+  unsloth's resolver. **PyTorch + CUDA** stay on the NGC base stack via
+  `--system-site-packages` and `preserve_base_torch.txt` / `no_override_requirements.txt`
+  overrides (same impossible-marker pattern as automodel).
+- **bitsandbytes** — compiled from source in the image (v0.49.1, same approach as
+  `nmp-automodel-base`) because NGC 26.02 is CUDA 13.1 and PyPI only ships
+  prebuilt libs through cuda130.
@@ -0,0 +1,11 @@
+# Preserve NGC base torch/CUDA and HF stack pins from Dockerfile step 1.
+# Impossible markers block uv from re-resolving these when adding platform glue.
+transformers; sys_platform == 'never'
+huggingface-hub; sys_platform == 'never'
+bitsandbytes; sys_platform == 'never'
+torch; sys_platform == 'never'
+torchvision; sys_platform == 'never'
+torchaudio; sys_platform == 'never'
+tokenizers; sys_platform == 'never'
+accelerate; sys_platform == 'never'
+safetensors; sys_platform == 'never'
@@ -0,0 +1,7 @@
+# Block uv from installing or upgrading PyTorch wheels into the venv.
+# The NGC base image ships torch + CUDA; the venv uses --system-site-packages
+# to inherit that stack. Impossible markers (sys_platform == 'never') are the
+# same pattern as docker/automodel/no_override_requirements.txt.
+torch; sys_platform == 'never'
+torchvision; sys_platform == 'never'
+torchaudio; sys_platform == 'never'
@@ -15,6 +15,7 @@ members = [
   "sdk/python/nemo-platform",
   "packages/nemo_platform_plugin",
   "packages/nmp_common",
+  "packages/nmp_customization_common",
   "services/unsloth",
 ]
 
@@ -23,4 +24,5 @@ nmp-build-tools = { workspace = true }
 nemo-platform-sdk = { workspace = true }
 nemo-platform-plugin = { workspace = true }
 nmp-common = { workspace = true }
+nmp-customization-common = { workspace = true }
 nmp-unsloth = { workspace = true }
@@ -0,0 +1,12 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Shared experiment-tracking integration schemas for platform plugins."""
+
+from nemo_platform_plugin.integrations.schemas import IntegrationsSpec, MlflowIntegration, WandbIntegration
+
+__all__ = [
+    "IntegrationsSpec",
+    "MlflowIntegration",
+    "WandbIntegration",
+]