From 60256ff0d4ea192639fd9a629cdf97fd1eb3b02d Mon Sep 17 00:00:00 2001 From: abondarenko Date: Fri, 12 Jun 2026 16:58:28 -0400 Subject: [PATCH 1/2] feat(release): declare publishable container images in release/assets.yaml The container list in release/assets.yaml is the single source of truth for which container images are eligible for release publishing. Platform-Deploy's Release Deploy Artifacts and Release Promote Public workflows read it from this repository at the release ref, so eligibility is version-pinned to the released commit. Declares the four Automodel images. Linear: AIREINF-216 Co-Authored-By: Claude Fable 5 Signed-off-by: abondarenko --- RELEASING.md | 15 +++++++++++++++ release/assets.yaml | 12 ++++++++++++ 2 files changed, 27 insertions(+) diff --git a/RELEASING.md b/RELEASING.md index bbba6242f5..d01ddccfa0 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -89,6 +89,21 @@ Also check: --- +## Container image eligibility + +The `container:` list in `release/assets.yaml` declares which container +images are eligible for release publishing. Platform-Deploy's +`Release Deploy Artifacts` (staging) and `Release Promote Public` (public +NGC) workflows read this list from this repository at the release ref, so +eligibility is version-pinned: re-staging an old tag publishes the container +set that was declared at that commit. Adding an image here also requires a +catalog metadata entry (overview, labels) in Platform-Deploy +`release/nemo-assets-config.yaml`, and the image must be pushed to the dev +registry tagged with this repository's commit SHA (the Automodel images are +built by Platform-Deploy's `docker-automodel.yaml`). + +--- + ## Nightly builds Nightly builds run automatically at 20:00 PT and publish to `pypi.nvidia.com`. They use the HEAD of `main` and version strings like `0.1.3.dev20260101120000`. No action required from the team. diff --git a/release/assets.yaml b/release/assets.yaml index 62ad4738f4..ff43d59a93 100644 --- a/release/assets.yaml +++ b/release/assets.yaml @@ -1,3 +1,15 @@ sdk: - id: nemo-platform - id: nemo-platform-plugin + +# Container images eligible for release publishing. This list is the single +# source of truth: Platform-Deploy's Release Deploy Artifacts and Release +# Promote Public workflows read it from this repository at the release ref. +# Each id must match an image name pushed to the dev registry tagged by this +# repository's commit SHA, and should have a catalog metadata entry in +# Platform-Deploy release/nemo-assets-config.yaml. +container: + - id: nmp-automodel-base + - id: nmp-automodel-tasks + - id: nmp-automodel-training + - id: nmp-unsloth-training From 73b11cf01734f4ebaea5f9e9fa28402d8d0eaa21 Mon Sep 17 00:00:00 2001 From: abondarenko Date: Fri, 12 Jun 2026 17:25:56 -0400 Subject: [PATCH 2/2] feat(release): include container artifacts in the release bundle manifest plan-assets reads the container list from release/assets.yaml (validated for non-empty, unique ids) and appends container-typed entries to the selected artifacts. The bundle metadata writer records them in release-manifest.json as metadata-only entries (type, id, version = release label; no path or checksum, since image bits are staged by Platform-Deploy from its dev registry at the bundle's source SHA). Containers ride along on every release regardless of release_scope. Linear: AIREINF-216 Co-Authored-By: Claude Fable 5 Signed-off-by: abondarenko --- .../scripts/write_release_bundle_metadata.py | 61 ++++++++++---- .github/workflows/release-bundle.yaml | 19 ++++- RELEASING.md | 25 ++++-- .../test_write_release_bundle_metadata.py | 83 ++++++++++++++++++- 4 files changed, 157 insertions(+), 31 deletions(-) diff --git a/.github/scripts/write_release_bundle_metadata.py b/.github/scripts/write_release_bundle_metadata.py index 92088aab69..76e0d80003 100644 --- a/.github/scripts/write_release_bundle_metadata.py +++ b/.github/scripts/write_release_bundle_metadata.py @@ -1,7 +1,12 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -"""Write release bundle metadata for downloaded SDK wheel artifacts.""" +"""Write release bundle metadata for downloaded SDK wheel artifacts. + +Container artifacts are metadata-only manifest entries: the image bits are +built and staged by Platform-Deploy from its dev registry at the bundle's +source SHA, so container entries carry no path or checksum. +""" import argparse import hashlib @@ -22,10 +27,12 @@ class BundleMetadataError(Exception): """Raised when the release bundle metadata cannot be written safely.""" -def safe_sdk_id(sdk_id: str) -> str: - if not re.fullmatch(r"[A-Za-z0-9._-]+", sdk_id) or sdk_id in {".", ".."}: - raise BundleMetadataError(f"selected SDK id must be a safe single path segment: {sdk_id}") - return sdk_id +def safe_artifact_id(artifact_type: str, artifact_id: str) -> str: + if not re.fullmatch(r"[A-Za-z0-9._-]+", artifact_id) or artifact_id in {".", ".."}: + raise BundleMetadataError( + f"selected {artifact_type} id must be a safe single path segment: {artifact_id}" + ) + return artifact_id def parse_release_date_json(value: str) -> str | None: @@ -43,7 +50,8 @@ def artifact_ref(artifact_type: object, artifact_id: object) -> str: return f"{artifact_type}:{artifact_id}" -def parse_selected_sdk_ids(value: str) -> list[str]: +def parse_selected_artifact_ids(value: str) -> dict[str, list[str]]: + """Parse selected_artifacts_json into ids grouped by artifact type.""" try: parsed = json.loads(value) except json.JSONDecodeError as error: @@ -52,29 +60,34 @@ def parse_selected_sdk_ids(value: str) -> list[str]: if not isinstance(parsed, list) or not parsed: raise BundleMetadataError("selected_artifacts_json must be a non-empty list") - sdk_ids: list[str] = [] - seen: set[str] = set() + ids_by_type: dict[str, list[str]] = {"sdk": [], "container": []} + seen: dict[str, set[str]] = {artifact_type: set() for artifact_type in ids_by_type} for artifact in parsed: if not isinstance(artifact, dict): raise BundleMetadataError("selected_artifacts_json entries must be objects") artifact_type = artifact.get("type") artifact_id = artifact.get("id") - if artifact_type != "sdk": + if artifact_type not in ids_by_type: raise BundleMetadataError( - f"only SDK artifacts are supported in V1 bundles: {artifact_ref(artifact_type, artifact_id)}" + f"unsupported artifact type in bundle selection: {artifact_ref(artifact_type, artifact_id)}" ) if not isinstance(artifact_id, str) or not artifact_id: - raise BundleMetadataError("selected SDK artifact id must be a non-empty string") + raise BundleMetadataError(f"selected {artifact_type} artifact id must be a non-empty string") + + checked_id = safe_artifact_id(artifact_type, artifact_id) + if checked_id in seen[artifact_type]: + raise BundleMetadataError( + f"selected_artifacts_json contains duplicate {artifact_type} id: {checked_id}" + ) - sdk_id = safe_sdk_id(artifact_id) - if sdk_id in seen: - raise BundleMetadataError(f"selected_artifacts_json contains duplicate SDK id: {sdk_id}") + seen[artifact_type].add(checked_id) + ids_by_type[artifact_type].append(checked_id) - seen.add(sdk_id) - sdk_ids.append(sdk_id) + if not ids_by_type["sdk"]: + raise BundleMetadataError("selected_artifacts_json must include at least one SDK artifact") - return sdk_ids + return ids_by_type def find_sdk_wheel(sdk_artifacts_dir: Path, sdk_id: str, *, single_sdk_artifact: bool) -> Path: @@ -167,7 +180,8 @@ def write_release_bundle_metadata( if not source_sha: raise BundleMetadataError("source_sha is required") - sdk_ids = parse_selected_sdk_ids(selected_artifacts_json) + ids_by_type = parse_selected_artifact_ids(selected_artifacts_json) + sdk_ids = ids_by_type["sdk"] release_date = parse_release_date_json(release_date_json) wheels_dir = prepare_bundle_dir(bundle_dir) @@ -189,6 +203,17 @@ def write_release_bundle_metadata( } ) + # Container artifacts are metadata-only: the consumer stages the images + # from its dev registry by source_sha and tags them with release_label. + for container_id in ids_by_type["container"]: + artifacts.append( + { + "type": "container", + "id": container_id, + "version": release_label, + } + ) + manifest: dict[str, object] = { "cadence": cadence, "release_label": release_label, diff --git a/.github/workflows/release-bundle.yaml b/.github/workflows/release-bundle.yaml index 4aa987034f..0b483a3c88 100644 --- a/.github/workflows/release-bundle.yaml +++ b/.github/workflows/release-bundle.yaml @@ -302,8 +302,24 @@ jobs: exit 1 fi + # Containers ride along on every release regardless of release_scope + # (scope governs SDK selection only): the container catalog is the + # eligibility list, and image bits are not built here. Entries are + # recorded in the manifest as typed artifacts; Platform-Deploy stages + # the images from its dev registry by this release's source SHA. + container_catalog="$(yq -o=json '.container // []' "${catalog}" | jq -c 'map(.id)')" + if ! jq -e 'type == "array" and all(.[]; type == "string" and length > 0)' <<<"${container_catalog}" >/dev/null; then + echo "::error::release/assets.yaml container must contain non-empty string id values" + exit 1 + fi + if ! jq -e 'length == (unique | length)' <<<"${container_catalog}" >/dev/null; then + echo "::error::release/assets.yaml container contains duplicate ids" + exit 1 + fi + sdk_artifacts_json="$(jq -nc --argjson ids "${selected_sdk_ids}" '$ids | map({type: "sdk", id: .})')" - selected_artifacts_json="${sdk_artifacts_json}" + container_artifacts_json="$(jq -nc --argjson ids "${container_catalog}" '$ids | map({type: "container", id: .})')" + selected_artifacts_json="$(jq -nc --argjson sdks "${sdk_artifacts_json}" --argjson containers "${container_artifacts_json}" '$sdks + $containers')" sdk_matrix="$(jq -nc --argjson artifacts "${sdk_artifacts_json}" '{include: $artifacts}')" sdk_count="$(jq -r 'length' <<<"${selected_sdk_ids}")" @@ -314,6 +330,7 @@ jobs: } >>"${GITHUB_OUTPUT}" echo "Planned SDK release artifacts: $(jq -r 'join(", ")' <<<"${selected_sdk_ids}")" + echo "Planned container release artifacts: $(jq -r 'join(", ")' <<<"${container_catalog}")" reserve-release-tag: name: Reserve release tag diff --git a/RELEASING.md b/RELEASING.md index d01ddccfa0..3df9a14ab7 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -92,15 +92,22 @@ Also check: ## Container image eligibility The `container:` list in `release/assets.yaml` declares which container -images are eligible for release publishing. Platform-Deploy's -`Release Deploy Artifacts` (staging) and `Release Promote Public` (public -NGC) workflows read this list from this repository at the release ref, so -eligibility is version-pinned: re-staging an old tag publishes the container -set that was declared at that commit. Adding an image here also requires a -catalog metadata entry (overview, labels) in Platform-Deploy -`release/nemo-assets-config.yaml`, and the image must be pushed to the dev -registry tagged with this repository's commit SHA (the Automodel images are -built by Platform-Deploy's `docker-automodel.yaml`). +images are eligible for release publishing. Containers ride along on every +release regardless of `release_scope` (scope governs SDK selection only): +the bundle workflow records them as `container`-typed entries in +`release-manifest.json`, and Platform-Deploy's `Release Deploy Artifacts` +workflow stages the images automatically after the SDK publish, reading +this list from this repository at the release ref. Eligibility is therefore +version-pinned: re-staging an old tag publishes the container set that was +declared at that commit. `Release Promote Public` (public NGC) reads the +same list at the release tag and stays a manual step. + +Adding an image here also requires a catalog metadata entry (overview, +labels) in Platform-Deploy `release/nemo-assets-config.yaml`. Images are +built into the dev registry tagged with this repository's commit SHA on +every merge to main (Platform-Deploy's `docker-automodel.yaml` via the +ci-passed dispatch); release SHAs that predate that trigger need a manual +`docker-automodel.yaml` dispatch first. --- diff --git a/tests/unit/release/test_write_release_bundle_metadata.py b/tests/unit/release/test_write_release_bundle_metadata.py index d50de738d3..e185f5944f 100644 --- a/tests/unit/release/test_write_release_bundle_metadata.py +++ b/tests/unit/release/test_write_release_bundle_metadata.py @@ -281,8 +281,8 @@ def test_multiple_directly_downloaded_wheels_fail_clearly(tmp_path: Path): ) -def test_non_sdk_artifact_type_fails_clearly(tmp_path: Path): - with pytest.raises(BundleMetadataError, match="only SDK artifacts are supported"): +def test_unsupported_artifact_type_fails_clearly(tmp_path: Path): + with pytest.raises(BundleMetadataError, match="unsupported artifact type"): bundle_metadata.write_release_bundle_metadata( sdk_artifacts_dir=tmp_path / "downloaded-artifacts", bundle_dir=tmp_path / "release-bundle", @@ -294,8 +294,85 @@ def test_non_sdk_artifact_type_fails_clearly(tmp_path: Path): ) +def test_container_artifacts_become_metadata_only_entries(tmp_path: Path): + sdk_artifacts_dir = tmp_path / "downloaded-artifacts" + bundle_dir = tmp_path / "release-bundle" + write_wheel(sdk_artifacts_dir, "nemo-platform") + + bundle_metadata.write_release_bundle_metadata( + sdk_artifacts_dir=sdk_artifacts_dir, + bundle_dir=bundle_dir, + selected_artifacts_json=selected_artifacts( + {"type": "sdk", "id": "nemo-platform"}, + {"type": "container", "id": "nmp-automodel-tasks"}, + {"type": "container", "id": "nmp-unsloth-training"}, + ), + cadence="rc", + release_label="1.0.0-rc1", + release_date_json="null", + source_sha="c" * 40, + ) + + artifacts = read_manifest(bundle_dir)["artifacts"] + assert artifacts[1:] == [ # type: ignore[index] + {"type": "container", "id": "nmp-automodel-tasks", "version": "1.0.0-rc1"}, + {"type": "container", "id": "nmp-unsloth-training", "version": "1.0.0-rc1"}, + ] + # Container entries are metadata-only: no path, and nothing extra in checksums. + assert set(parse_checksums(bundle_dir)) == { + "release-manifest.json", + "wheels/nemo_platform-1.0.0-py3-none-any.whl", + } + + +def test_container_only_selection_fails_clearly(tmp_path: Path): + with pytest.raises(BundleMetadataError, match="at least one SDK artifact"): + bundle_metadata.write_release_bundle_metadata( + sdk_artifacts_dir=tmp_path / "downloaded-artifacts", + bundle_dir=tmp_path / "release-bundle", + selected_artifacts_json=selected_artifacts({"type": "container", "id": "nmp-automodel-tasks"}), + cadence="release", + release_label="1.0.0", + release_date_json="null", + source_sha="a" * 40, + ) + + +def test_duplicate_container_ids_fail_clearly(tmp_path: Path): + with pytest.raises(BundleMetadataError, match="duplicate container id: nmp-automodel-tasks"): + bundle_metadata.write_release_bundle_metadata( + sdk_artifacts_dir=tmp_path / "downloaded-artifacts", + bundle_dir=tmp_path / "release-bundle", + selected_artifacts_json=selected_artifacts( + {"type": "sdk", "id": "nemo-platform"}, + {"type": "container", "id": "nmp-automodel-tasks"}, + {"type": "container", "id": "nmp-automodel-tasks"}, + ), + cadence="release", + release_label="1.0.0", + release_date_json="null", + source_sha="a" * 40, + ) + + +def test_unsafe_container_id_fails_clearly(tmp_path: Path): + with pytest.raises(BundleMetadataError, match="container id must be a safe single path segment"): + bundle_metadata.write_release_bundle_metadata( + sdk_artifacts_dir=tmp_path / "downloaded-artifacts", + bundle_dir=tmp_path / "release-bundle", + selected_artifacts_json=selected_artifacts( + {"type": "sdk", "id": "nemo-platform"}, + {"type": "container", "id": "../evil"}, + ), + cadence="release", + release_label="1.0.0", + release_date_json="null", + source_sha="a" * 40, + ) + + def test_duplicate_selected_sdk_ids_fail_clearly(tmp_path: Path): - with pytest.raises(BundleMetadataError, match="duplicate SDK id: nemo-platform"): + with pytest.raises(BundleMetadataError, match="duplicate sdk id: nemo-platform"): bundle_metadata.write_release_bundle_metadata( sdk_artifacts_dir=tmp_path / "downloaded-artifacts", bundle_dir=tmp_path / "release-bundle",