From 28ebe19e3124391ff455c1c42758ee8323229560 Mon Sep 17 00:00:00 2001 From: Gerrod Ubben Date: Tue, 2 Jun 2026 10:01:14 -0400 Subject: [PATCH 1/4] Readd manifest artifact backwards compatibility Reverts https://github.com/pulp/pulp_container/commit/ca2b2a59d8d3ca19f04c628a7a79939678c00935 --- pulp_container/app/downloaders.py | 8 +++- pulp_container/app/redirects.py | 42 ++++++++++++++++++ pulp_container/app/registry.py | 44 ++++++++++++++++++ pulp_container/app/tasks/sign.py | 22 +++++++-- pulp_container/app/tasks/sync_stages.py | 59 +++++++++++++++++++------ 5 files changed, 157 insertions(+), 18 deletions(-) diff --git a/pulp_container/app/downloaders.py b/pulp_container/app/downloaders.py index 907fc0958..9dae12021 100644 --- a/pulp_container/app/downloaders.py +++ b/pulp_container/app/downloaders.py @@ -10,6 +10,8 @@ from pulpcore.plugin.download import DownloaderFactory, HttpDownloader +from pulp_container.constants import V2_ACCEPT_HEADERS + log = getLogger(__name__) HeadResult = namedtuple( @@ -51,7 +53,11 @@ async def _run(self, handle_401=True, extra_data=None): handle_401(bool): If true, catch 401, request a new token and retry. """ - headers = {} + # manifests are header sensitive, blobs do not care + # these accept headers are going to be sent with every request to ensure downloader + # can download manifests, namely in the repair core task + # FIXME this can be rolledback after https://github.com/pulp/pulp_container/issues/1288 + headers = V2_ACCEPT_HEADERS repo_name = None if extra_data is not None: headers = extra_data.get("headers", headers) diff --git a/pulp_container/app/redirects.py b/pulp_container/app/redirects.py index 2ac800506..435430fa1 100644 --- a/pulp_container/app/redirects.py +++ b/pulp_container/app/redirects.py @@ -2,6 +2,7 @@ from django.conf import settings from django.core.exceptions import ObjectDoesNotExist +from django.http import Http404 from django.shortcuts import redirect from pulp_container.app.exceptions import ManifestNotFound @@ -101,6 +102,47 @@ def redirect_to_object_storage(self, artifact, return_media_type): ) return redirect(content_url) + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifests + def redirect_to_artifact(self, content_name, manifest, manifest_media_type): + """ + Search for the passed manifest's artifact and issue a redirect. + """ + try: + artifact = manifest._artifacts.get() + except ObjectDoesNotExist: + raise Http404(f"An artifact for '{content_name}' was not found") + + return self.redirect_to_object_storage(artifact, manifest_media_type) + + def issue_tag_redirect(self, tag): + """ + Issue a redirect if an accepted media type requires it or return not found if manifest + version is not supported. + """ + if tag.tagged_manifest.data: + return super().issue_tag_redirect(tag) + + manifest_media_type = tag.tagged_manifest.media_type + if manifest_media_type == MEDIA_TYPE.MANIFEST_V1: + return self.redirect_to_artifact( + tag.name, tag.tagged_manifest, MEDIA_TYPE.MANIFEST_V1_SIGNED + ) + elif manifest_media_type in get_accepted_media_types(self.request.headers): + return self.redirect_to_artifact(tag.name, tag.tagged_manifest, manifest_media_type) + else: + raise ManifestNotFound(reference=tag.name) + + def issue_manifest_redirect(self, manifest): + """ + Directly redirect to an associated manifest's artifact. + """ + if manifest.data: + return super().issue_manifest_redirect(manifest) + + return self.redirect_to_artifact(manifest.digest, manifest, manifest.media_type) + + # END OF BACKWARD COMPATIBILITY + class AzureStorageRedirects(S3StorageRedirects): """ diff --git a/pulp_container/app/registry.py b/pulp_container/app/registry.py index d78d13e93..30024bdc9 100644 --- a/pulp_container/app/registry.py +++ b/pulp_container/app/registry.py @@ -197,6 +197,10 @@ async def get_tag(self, request): "Content-Type": return_media_type, "Docker-Content-Digest": tag.tagged_manifest.digest, } + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + if not tag.tagged_manifest.data: + return await self.dispatch_tag(request, tag, response_headers) + # END OF BACKWARD COMPATIBILITY return web.Response(body=tag.tagged_manifest.data, headers=response_headers) # return what was found in case media_type is accepted header (docker, oci) @@ -206,11 +210,41 @@ async def get_tag(self, request): "Content-Type": return_media_type, "Docker-Content-Digest": tag.tagged_manifest.digest, } + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + if not tag.tagged_manifest.data: + return await self.dispatch_tag(request, tag, response_headers) + # END OF BACKWARD COMPATIBILITY return web.Response(body=tag.tagged_manifest.data, headers=response_headers) # return 404 in case the client is requesting docker manifest v2 schema 1 raise PathNotResolved(tag_name) + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + async def dispatch_tag(self, request, tag, response_headers): + """ + Finds an artifact associated with a Tag and sends it to the client, otherwise tries + to stream it. + + Args: + request(:class:`~aiohttp.web.Request`): The request to prepare a response for. + tag: Tag + response_headers (dict): dictionary that contains the 'Content-Type' header to send + with the response + + Returns: + :class:`aiohttp.web.StreamResponse` or :class:`aiohttp.web.FileResponse`: The response + streamed back to the client. + + """ + try: + artifact = await tag.tagged_manifest._artifacts.aget() + except ObjectDoesNotExist: + ca = await sync_to_async(lambda x: x[0])(tag.tagged_manifest.contentartifact_set.all()) + return await self._stream_content_artifact(request, web.StreamResponse(), ca) + else: + return await Registry._dispatch(artifact, response_headers) + # END OF BACKWARD COMPATIBILITY + @RegistryContentCache( base_key=lambda req, cac: Registry.find_base_path_cached(req, cac), auth=lambda req, cac, bk: Registry.auth_cached(req, cac, bk), @@ -247,6 +281,16 @@ async def get_by_digest(self, request): "Content-Type": manifest.media_type, "Docker-Content-Digest": manifest.digest, } + # TODO: BACKWARD COMPATIBILITY - remove after migrating to artifactless manifest + if not manifest.data: + if saved_artifact := await manifest._artifacts.afirst(): + return await Registry._dispatch(saved_artifact, headers) + else: + ca = await sync_to_async(lambda x: x[0])(manifest.contentartifact_set.all()) + return await self._stream_content_artifact( + request, web.StreamResponse(), ca + ) + # END OF BACKWARD COMPATIBILITY return web.Response(body=manifest.data, headers=headers) elif content_type == "blobs": ca = await ContentArtifact.objects.select_related("artifact", "content").aget( diff --git a/pulp_container/app/tasks/sign.py b/pulp_container/app/tasks/sign.py index 2875abbc2..1ebd58cb2 100644 --- a/pulp_container/app/tasks/sign.py +++ b/pulp_container/app/tasks/sign.py @@ -3,6 +3,7 @@ import hashlib from aiofiles import tempfile +from asgiref.sync import sync_to_async from django.conf import settings from django.db.models import Q @@ -101,10 +102,23 @@ async def create_signature(manifest, reference, signing_service): """ async with semaphore: # download and write file for object storage - async with tempfile.NamedTemporaryFile(dir=".", mode="wb", delete=False) as tf: - await tf.write(manifest.data.encode("utf-8")) - await tf.flush() - manifest_path = tf.name + if not manifest.data: + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + artifact = await manifest._artifacts.aget() + if settings.STORAGES["default"]["BACKEND"] != "pulpcore.app.models.storage.FileSystem": + async with tempfile.NamedTemporaryFile(dir=".", mode="wb", delete=False) as tf: + await tf.write(await sync_to_async(artifact.file.read)()) + await tf.flush() + artifact.file.close() + manifest_path = tf.name + else: + manifest_path = artifact.file.path + # END OF BACKWARD COMPATIBILITY + else: + async with tempfile.NamedTemporaryFile(dir=".", mode="wb", delete=False) as tf: + await tf.write(manifest.data.encode("utf-8")) + await tf.flush() + manifest_path = tf.name async with tempfile.NamedTemporaryFile(dir=".", prefix="signature") as tf: sig_path = tf.name diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 244b8ba52..4ced708b4 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -25,6 +25,7 @@ determine_media_type, extract_data_from_signature, filter_resources, + get_content_data, urlpath_sanitize, validate_manifest, ) @@ -83,11 +84,25 @@ async def _check_for_existing_manifest(self, download_tag): digest = response.headers.get("docker-content-digest") - if manifest := await Manifest.objects.filter( - digest=digest, pulp_domain=get_domain() - ).afirst(): - raw_text_data = manifest.data - content_data = json.loads(raw_text_data) + if ( + manifest := await Manifest.objects.prefetch_related("contentartifact_set") + .filter(digest=digest, pulp_domain=get_domain()) + .afirst() + ): + if raw_text_data := manifest.data: + content_data = json.loads(raw_text_data) + + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + elif saved_artifact := await manifest._artifacts.afirst(): + content_data, raw_bytes_data = await sync_to_async(get_content_data)(saved_artifact) + raw_text_data = raw_bytes_data.decode("utf-8") + # if artifact is not available (due to reclaim space) we will download it again + else: + content_data, raw_text_data, response = await self._download_manifest_data( + response.url + ) + # END OF BACKWARD COMPATIBILITY + else: content_data, raw_text_data, response = await self._download_manifest_data(response.url) @@ -364,7 +379,9 @@ async def get_paginated_tag_list(self, rel_link, repo_name): while True: link = urljoin(self.remote.url, rel_link) list_downloader = self.remote.get_downloader(url=link) - await list_downloader.run(extra_data={"repo_name": repo_name}) + # FIXME this can be rolledback after https://github.com/pulp/pulp_container/issues/1288 + # tags/list endpoint does not like any unnecessary headers to be sent + await list_downloader.run(extra_data={"repo_name": repo_name, "headers": {}}) with open(list_downloader.path) as tags_raw: tags_dict = json.loads(tags_raw.read()) tag_list.extend(tags_dict["tags"]) @@ -507,12 +524,26 @@ async def create_listed_manifest(self, manifest_data): ) manifest_url = urljoin(self.remote.url, relative_url) - if manifest := await Manifest.objects.filter( - digest=digest, pulp_domain=get_domain() - ).afirst(): - content_data = json.loads(manifest.data) - - content_data, manifest = await self._download_and_instantiate_manifest(manifest_url, digest) + if ( + manifest := await Manifest.objects.prefetch_related("contentartifact_set") + .filter(digest=digest, pulp_domain=get_domain()) + .afirst() + ): + if manifest.data: + content_data = json.loads(manifest.data) + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + elif saved_artifact := await manifest._artifacts.afirst(): + content_data, _ = await sync_to_async(get_content_data)(saved_artifact) + # if artifact is not available (due to reclaim space) we will download it again + else: + content_data, manifest = await self._download_and_instantiate_manifest( + manifest_url, digest + ) + # END OF BACKWARD COMPATIBILITY + else: + content_data, manifest = await self._download_and_instantiate_manifest( + manifest_url, digest + ) # in oci-index spec, platform is an optional field platform = manifest_data.get("platform", None) @@ -603,7 +634,9 @@ async def create_signatures(self, man_dc, signature_source): man_dc.content.digest, ) signatures_downloader = self.remote.get_downloader(url=signatures_url) - await signatures_downloader.run() + # FIXME this can be rolledback after https://github.com/pulp/pulp_container/issues/1288 + # signature extensions endpoint does not like any unnecessary headers to be sent + await signatures_downloader.run(extra_data={"headers": {}}) with open(signatures_downloader.path) as signatures_fd: api_extension_signatures = json.loads(signatures_fd.read()) for signature in api_extension_signatures.get("signatures", []): From b0ce4042f373e17db78ce12d3a2b08874dc19fe0 Mon Sep 17 00:00:00 2001 From: Gerrod Ubben Date: Tue, 2 Jun 2026 11:38:50 -0400 Subject: [PATCH 2/4] Add simple data repair during manifest sync --- pulp_container/app/tasks/sync_stages.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 4ced708b4..efd2be7fc 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -101,6 +101,9 @@ async def _check_for_existing_manifest(self, download_tag): content_data, raw_text_data, response = await self._download_manifest_data( response.url ) + if manifest.data is None: + manifest.data = raw_text_data + await manifest.asave(update_fields=["data"]) # END OF BACKWARD COMPATIBILITY else: @@ -533,12 +536,16 @@ async def create_listed_manifest(self, manifest_data): content_data = json.loads(manifest.data) # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest elif saved_artifact := await manifest._artifacts.afirst(): - content_data, _ = await sync_to_async(get_content_data)(saved_artifact) + content_data, raw_bytes_data = await sync_to_async(get_content_data)(saved_artifact) + manifest.data = raw_bytes_data.decode("utf-8") + await manifest.asave(update_fields=["data"]) # if artifact is not available (due to reclaim space) we will download it again else: - content_data, manifest = await self._download_and_instantiate_manifest( + content_data, new_manifest = await self._download_and_instantiate_manifest( manifest_url, digest ) + manifest.data = new_manifest.data + await manifest.asave(update_fields=["data"]) # END OF BACKWARD COMPATIBILITY else: content_data, manifest = await self._download_and_instantiate_manifest( From 6826bae1a4b56dbaf252f27db1810965e3f06ccf Mon Sep 17 00:00:00 2001 From: Gerrod Ubben Date: Tue, 2 Jun 2026 12:19:12 -0400 Subject: [PATCH 3/4] Update handle-image-data command to report truly broken manifests --- .../commands/container-handle-image-data.py | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/pulp_container/app/management/commands/container-handle-image-data.py b/pulp_container/app/management/commands/container-handle-image-data.py index 20869a41c..b7f679aa1 100644 --- a/pulp_container/app/management/commands/container-handle-image-data.py +++ b/pulp_container/app/management/commands/container-handle-image-data.py @@ -1,4 +1,4 @@ -from contextlib import suppress +from collections import defaultdict from gettext import gettext as _ from json.decoder import JSONDecodeError @@ -8,6 +8,7 @@ from django.db.models import Q from pulpcore.plugin.cache import SyncContentCache +from pulpcore.plugin.util import get_url from pulp_container.app.models import ContainerDistribution, Manifest from pulp_container.app.utils import get_content_data @@ -34,6 +35,7 @@ class Command(BaseCommand): def handle(self, *args, **options): manifests_updated_count = 0 + self._broken_manifests = [] manifests_v1 = Manifest.objects.filter( Q(media_type=MEDIA_TYPE.MANIFEST_V1), @@ -68,6 +70,23 @@ def handle(self, *args, **options): self.style.SUCCESS("Successfully updated %d manifests." % manifests_updated_count) ) + if self._broken_manifests: + self.stdout.write( + self.style.WARNING("Found %d broken manifests." % len(self._broken_manifests)) + ) + broken_by_repo = defaultdict(list) + for manifest in self._broken_manifests: + repos = manifest.repositories.all() + if repos: + for repo in repos: + broken_by_repo[get_url(repo)].append(get_url(manifest)) + else: + broken_by_repo["orphaned"].append(get_url(manifest)) + for repo_url, manifests in broken_by_repo.items(): + self.stdout.write(self.style.WARNING(" %s" % repo_url)) + for manifest_url in manifests: + self.stdout.write(self.style.WARNING(" %s" % manifest_url)) + if settings.CACHE_ENABLED and manifests_updated_count != 0: base_paths = ContainerDistribution.objects.values_list("base_path", flat=True) if base_paths: @@ -91,11 +110,13 @@ def update_manifests(self, manifests_qs): ] for manifest in manifests_qs.iterator(): - # suppress non-existing/already migrated artifacts and corrupted JSON files - with suppress(ObjectDoesNotExist, JSONDecodeError): + try: needs_update = self.init_manifest(manifest) - if needs_update: - manifests_to_update.append(manifest) + except (ObjectDoesNotExist, JSONDecodeError): + self._broken_manifests.append(manifest) + continue + if needs_update: + manifests_to_update.append(manifest) if len(manifests_to_update) > 1000: manifests_qs.model.objects.bulk_update( From a1dd20f4f5ed896e043f9ad448e01b91adfe80a0 Mon Sep 17 00:00:00 2001 From: Gerrod Ubben Date: Tue, 2 Jun 2026 13:49:17 -0400 Subject: [PATCH 4/4] Add manifest bugfix changelog --- CHANGES/+dataless_manifests.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/+dataless_manifests.bugfix diff --git a/CHANGES/+dataless_manifests.bugfix b/CHANGES/+dataless_manifests.bugfix new file mode 100644 index 000000000..0b09c50f9 --- /dev/null +++ b/CHANGES/+dataless_manifests.bugfix @@ -0,0 +1 @@ +Readded backwards compatibility for manifests that haven't been migrated to new data field. Sync will try to update them to the new format and repair manifests that are missing their artifact, but it is recommended that you run the `pulpcore-manager container-handle-image-data` command to fix all manifests at once. The command has been updated to report broken manifests by repository. Broken manifests in pushed repositories need to be deleted and re-pushed. \ No newline at end of file