Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 43 additions & 18 deletions .github/scripts/write_release_bundle_metadata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Write release bundle metadata for downloaded SDK wheel artifacts."""
"""Write release bundle metadata for downloaded SDK wheel artifacts.

Container artifacts are metadata-only manifest entries: the image bits are
built and staged by Platform-Deploy from its dev registry at the bundle's
source SHA, so container entries carry no path or checksum.
"""

import argparse
import hashlib
Expand All @@ -22,10 +27,12 @@ class BundleMetadataError(Exception):
"""Raised when the release bundle metadata cannot be written safely."""


def safe_sdk_id(sdk_id: str) -> str:
if not re.fullmatch(r"[A-Za-z0-9._-]+", sdk_id) or sdk_id in {".", ".."}:
raise BundleMetadataError(f"selected SDK id must be a safe single path segment: {sdk_id}")
return sdk_id
def safe_artifact_id(artifact_type: str, artifact_id: str) -> str:
if not re.fullmatch(r"[A-Za-z0-9._-]+", artifact_id) or artifact_id in {".", ".."}:
raise BundleMetadataError(
f"selected {artifact_type} id must be a safe single path segment: {artifact_id}"
)
return artifact_id


def parse_release_date_json(value: str) -> str | None:
Expand All @@ -43,7 +50,8 @@ def artifact_ref(artifact_type: object, artifact_id: object) -> str:
return f"{artifact_type}:{artifact_id}"


def parse_selected_sdk_ids(value: str) -> list[str]:
def parse_selected_artifact_ids(value: str) -> dict[str, list[str]]:
"""Parse selected_artifacts_json into ids grouped by artifact type."""
try:
parsed = json.loads(value)
except json.JSONDecodeError as error:
Expand All @@ -52,29 +60,34 @@ def parse_selected_sdk_ids(value: str) -> list[str]:
if not isinstance(parsed, list) or not parsed:
raise BundleMetadataError("selected_artifacts_json must be a non-empty list")

sdk_ids: list[str] = []
seen: set[str] = set()
ids_by_type: dict[str, list[str]] = {"sdk": [], "container": []}
seen: dict[str, set[str]] = {artifact_type: set() for artifact_type in ids_by_type}
for artifact in parsed:
if not isinstance(artifact, dict):
raise BundleMetadataError("selected_artifacts_json entries must be objects")

artifact_type = artifact.get("type")
artifact_id = artifact.get("id")
if artifact_type != "sdk":
if artifact_type not in ids_by_type:
raise BundleMetadataError(
f"only SDK artifacts are supported in V1 bundles: {artifact_ref(artifact_type, artifact_id)}"
f"unsupported artifact type in bundle selection: {artifact_ref(artifact_type, artifact_id)}"
)
if not isinstance(artifact_id, str) or not artifact_id:
raise BundleMetadataError("selected SDK artifact id must be a non-empty string")
raise BundleMetadataError(f"selected {artifact_type} artifact id must be a non-empty string")

checked_id = safe_artifact_id(artifact_type, artifact_id)
if checked_id in seen[artifact_type]:
raise BundleMetadataError(
f"selected_artifacts_json contains duplicate {artifact_type} id: {checked_id}"
)

sdk_id = safe_sdk_id(artifact_id)
if sdk_id in seen:
raise BundleMetadataError(f"selected_artifacts_json contains duplicate SDK id: {sdk_id}")
seen[artifact_type].add(checked_id)
ids_by_type[artifact_type].append(checked_id)

seen.add(sdk_id)
sdk_ids.append(sdk_id)
if not ids_by_type["sdk"]:
raise BundleMetadataError("selected_artifacts_json must include at least one SDK artifact")

return sdk_ids
return ids_by_type


def find_sdk_wheel(sdk_artifacts_dir: Path, sdk_id: str, *, single_sdk_artifact: bool) -> Path:
Expand Down Expand Up @@ -167,7 +180,8 @@ def write_release_bundle_metadata(
if not source_sha:
raise BundleMetadataError("source_sha is required")

sdk_ids = parse_selected_sdk_ids(selected_artifacts_json)
ids_by_type = parse_selected_artifact_ids(selected_artifacts_json)
sdk_ids = ids_by_type["sdk"]
release_date = parse_release_date_json(release_date_json)
wheels_dir = prepare_bundle_dir(bundle_dir)

Expand All @@ -189,6 +203,17 @@ def write_release_bundle_metadata(
}
)

# Container artifacts are metadata-only: the consumer stages the images
# from its dev registry by source_sha and tags them with release_label.
for container_id in ids_by_type["container"]:
artifacts.append(
{
"type": "container",
"id": container_id,
"version": release_label,
}
)

manifest: dict[str, object] = {
"cadence": cadence,
"release_label": release_label,
Expand Down
19 changes: 18 additions & 1 deletion .github/workflows/release-bundle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,24 @@ jobs:
exit 1
fi

# Containers ride along on every release regardless of release_scope
# (scope governs SDK selection only): the container catalog is the
# eligibility list, and image bits are not built here. Entries are
# recorded in the manifest as typed artifacts; Platform-Deploy stages
# the images from its dev registry by this release's source SHA.
container_catalog="$(yq -o=json '.container // []' "${catalog}" | jq -c 'map(.id)')"
if ! jq -e 'type == "array" and all(.[]; type == "string" and length > 0)' <<<"${container_catalog}" >/dev/null; then
echo "::error::release/assets.yaml container must contain non-empty string id values"
exit 1
fi
if ! jq -e 'length == (unique | length)' <<<"${container_catalog}" >/dev/null; then
echo "::error::release/assets.yaml container contains duplicate ids"
exit 1
fi

sdk_artifacts_json="$(jq -nc --argjson ids "${selected_sdk_ids}" '$ids | map({type: "sdk", id: .})')"
selected_artifacts_json="${sdk_artifacts_json}"
container_artifacts_json="$(jq -nc --argjson ids "${container_catalog}" '$ids | map({type: "container", id: .})')"
selected_artifacts_json="$(jq -nc --argjson sdks "${sdk_artifacts_json}" --argjson containers "${container_artifacts_json}" '$sdks + $containers')"
sdk_matrix="$(jq -nc --argjson artifacts "${sdk_artifacts_json}" '{include: $artifacts}')"
sdk_count="$(jq -r 'length' <<<"${selected_sdk_ids}")"

Expand All @@ -314,6 +330,7 @@ jobs:
} >>"${GITHUB_OUTPUT}"

echo "Planned SDK release artifacts: $(jq -r 'join(", ")' <<<"${selected_sdk_ids}")"
echo "Planned container release artifacts: $(jq -r 'join(", ")' <<<"${container_catalog}")"

reserve-release-tag:
name: Reserve release tag
Expand Down
22 changes: 22 additions & 0 deletions RELEASING.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,28 @@ Also check:

---

## Container image eligibility

The `container:` list in `release/assets.yaml` declares which container
images are eligible for release publishing. Containers ride along on every
release regardless of `release_scope` (scope governs SDK selection only):
the bundle workflow records them as `container`-typed entries in
`release-manifest.json`, and Platform-Deploy's `Release Deploy Artifacts`
workflow stages the images automatically after the SDK publish, reading
this list from this repository at the release ref. Eligibility is therefore
version-pinned: re-staging an old tag publishes the container set that was
declared at that commit. `Release Promote Public` (public NGC) reads the
same list at the release tag and stays a manual step.

Adding an image here also requires a catalog metadata entry (overview,
labels) in Platform-Deploy `release/nemo-assets-config.yaml`. Images are
built into the dev registry tagged with this repository's commit SHA on
every merge to main (Platform-Deploy's `docker-automodel.yaml` via the
ci-passed dispatch); release SHAs that predate that trigger need a manual
`docker-automodel.yaml` dispatch first.

---

## Nightly builds

Nightly builds run automatically at 20:00 PT and publish to `pypi.nvidia.com`. They use the HEAD of `main` and version strings like `0.1.3.dev20260101120000`. No action required from the team.
Expand Down
12 changes: 12 additions & 0 deletions release/assets.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
sdk:
- id: nemo-platform
- id: nemo-platform-plugin

# Container images eligible for release publishing. This list is the single
# source of truth: Platform-Deploy's Release Deploy Artifacts and Release
# Promote Public workflows read it from this repository at the release ref.
# Each id must match an image name pushed to the dev registry tagged by this
# repository's commit SHA, and should have a catalog metadata entry in
# Platform-Deploy release/nemo-assets-config.yaml.
container:
- id: nmp-automodel-base
- id: nmp-automodel-tasks
- id: nmp-automodel-training
- id: nmp-unsloth-training
83 changes: 80 additions & 3 deletions tests/unit/release/test_write_release_bundle_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,8 @@ def test_multiple_directly_downloaded_wheels_fail_clearly(tmp_path: Path):
)


def test_non_sdk_artifact_type_fails_clearly(tmp_path: Path):
with pytest.raises(BundleMetadataError, match="only SDK artifacts are supported"):
def test_unsupported_artifact_type_fails_clearly(tmp_path: Path):
with pytest.raises(BundleMetadataError, match="unsupported artifact type"):
bundle_metadata.write_release_bundle_metadata(
sdk_artifacts_dir=tmp_path / "downloaded-artifacts",
bundle_dir=tmp_path / "release-bundle",
Expand All @@ -294,8 +294,85 @@ def test_non_sdk_artifact_type_fails_clearly(tmp_path: Path):
)


def test_container_artifacts_become_metadata_only_entries(tmp_path: Path):
sdk_artifacts_dir = tmp_path / "downloaded-artifacts"
bundle_dir = tmp_path / "release-bundle"
write_wheel(sdk_artifacts_dir, "nemo-platform")

bundle_metadata.write_release_bundle_metadata(
sdk_artifacts_dir=sdk_artifacts_dir,
bundle_dir=bundle_dir,
selected_artifacts_json=selected_artifacts(
{"type": "sdk", "id": "nemo-platform"},
{"type": "container", "id": "nmp-automodel-tasks"},
{"type": "container", "id": "nmp-unsloth-training"},
),
cadence="rc",
release_label="1.0.0-rc1",
release_date_json="null",
source_sha="c" * 40,
)

artifacts = read_manifest(bundle_dir)["artifacts"]
assert artifacts[1:] == [ # type: ignore[index]
{"type": "container", "id": "nmp-automodel-tasks", "version": "1.0.0-rc1"},
{"type": "container", "id": "nmp-unsloth-training", "version": "1.0.0-rc1"},
]
# Container entries are metadata-only: no path, and nothing extra in checksums.
assert set(parse_checksums(bundle_dir)) == {
"release-manifest.json",
"wheels/nemo_platform-1.0.0-py3-none-any.whl",
}


def test_container_only_selection_fails_clearly(tmp_path: Path):
with pytest.raises(BundleMetadataError, match="at least one SDK artifact"):
bundle_metadata.write_release_bundle_metadata(
sdk_artifacts_dir=tmp_path / "downloaded-artifacts",
bundle_dir=tmp_path / "release-bundle",
selected_artifacts_json=selected_artifacts({"type": "container", "id": "nmp-automodel-tasks"}),
cadence="release",
release_label="1.0.0",
release_date_json="null",
source_sha="a" * 40,
)


def test_duplicate_container_ids_fail_clearly(tmp_path: Path):
with pytest.raises(BundleMetadataError, match="duplicate container id: nmp-automodel-tasks"):
bundle_metadata.write_release_bundle_metadata(
sdk_artifacts_dir=tmp_path / "downloaded-artifacts",
bundle_dir=tmp_path / "release-bundle",
selected_artifacts_json=selected_artifacts(
{"type": "sdk", "id": "nemo-platform"},
{"type": "container", "id": "nmp-automodel-tasks"},
{"type": "container", "id": "nmp-automodel-tasks"},
),
cadence="release",
release_label="1.0.0",
release_date_json="null",
source_sha="a" * 40,
)


def test_unsafe_container_id_fails_clearly(tmp_path: Path):
with pytest.raises(BundleMetadataError, match="container id must be a safe single path segment"):
bundle_metadata.write_release_bundle_metadata(
sdk_artifacts_dir=tmp_path / "downloaded-artifacts",
bundle_dir=tmp_path / "release-bundle",
selected_artifacts_json=selected_artifacts(
{"type": "sdk", "id": "nemo-platform"},
{"type": "container", "id": "../evil"},
),
cadence="release",
release_label="1.0.0",
release_date_json="null",
source_sha="a" * 40,
)


def test_duplicate_selected_sdk_ids_fail_clearly(tmp_path: Path):
with pytest.raises(BundleMetadataError, match="duplicate SDK id: nemo-platform"):
with pytest.raises(BundleMetadataError, match="duplicate sdk id: nemo-platform"):
bundle_metadata.write_release_bundle_metadata(
sdk_artifacts_dir=tmp_path / "downloaded-artifacts",
bundle_dir=tmp_path / "release-bundle",
Expand Down
Loading