From a2a3386f268996a784bc727e3c14d9c27dd0144f Mon Sep 17 00:00:00 2001 From: Matthias Dellweg Date: Tue, 2 Jun 2026 16:28:54 +0200 Subject: [PATCH] Add resiliant repair for raw manifest data on sync In case the data field was not properly populated during sync, we download it again and save it to the database. (cherry picked from commit fb5060543c6cba6e68952b6d9ea4b029f5132410) --- CHANGES/+sync_repair_manifest_data.bugfix | 1 + pulp_container/app/tasks/sync_stages.py | 45 ++++++++++++++++++----- 2 files changed, 36 insertions(+), 10 deletions(-) create mode 100644 CHANGES/+sync_repair_manifest_data.bugfix diff --git a/CHANGES/+sync_repair_manifest_data.bugfix b/CHANGES/+sync_repair_manifest_data.bugfix new file mode 100644 index 000000000..d8148d1f0 --- /dev/null +++ b/CHANGES/+sync_repair_manifest_data.bugfix @@ -0,0 +1 @@ +Add transparent repair code during Manifest sync to redownload the raw data in case it was lost during an upgrade. diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 9f572db96..d24e62bb4 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -98,7 +98,15 @@ async def _check_for_existing_manifest(self, head_manifest_task): digest=digest, pulp_domain=get_domain() ).afirst(): raw_text_data = manifest.data - content_data = json.loads(raw_text_data) + if raw_text_data is None: + # This situation allegedly happens on some upgrade paths. + # A migration should mark the field not Null eventually. At that point this + # workaround can be removed. + content_data, raw_text_data, _ = await self._download_manifest_data(response.url) + manifest.data = raw_text_data + await manifest.asave() + else: + content_data = json.loads(raw_text_data) else: if not original_reference.startswith("sha256:") and digest: # Fetch the tag with its digest @@ -522,6 +530,8 @@ async def create_listed_manifest(self, manifest_data): """ digest = manifest_data["digest"] + # in oci-index spec, platform is an optional field + platform = manifest_data.get("platform", None) relative_url = "/v2/{name}/manifests/{digest}".format( name=self.remote.namespaced_upstream_name, digest=digest ) @@ -530,15 +540,30 @@ async def create_listed_manifest(self, manifest_data): if manifest := await Manifest.objects.filter( digest=digest, pulp_domain=get_domain() ).afirst(): - content_data = json.loads(manifest.data) - - content_data, manifest = await self._download_and_instantiate_manifest(manifest_url, digest) - - # in oci-index spec, platform is an optional field - platform = manifest_data.get("platform", None) - if platform: - manifest.os = platform["os"] - manifest.architecture = platform["architecture"] + raw_text_data = manifest.data + if raw_text_data is None: + # This situation allegedly happens on some upgrade paths. + # A migration should mark the field not Null eventually. At that point this + # workaround can be removed. + content_data, raw_text_data, response = await self._download_manifest_data( + manifest_url + ) + media_type = determine_media_type(content_data, response) + validate_manifest(content_data, media_type, digest) + manifest.data = raw_text_data + if platform: + manifest.os = platform["os"] + manifest.architecture = platform["architecture"] + await manifest.asave() + else: + content_data = json.loads(raw_text_data) + else: + content_data, manifest = await self._download_and_instantiate_manifest( + manifest_url, digest + ) + if platform: + manifest.os = platform["os"] + manifest.architecture = platform["architecture"] man_dc = DeclarativeContent(content=manifest) return {"manifest_dc": man_dc, "platform": platform, "content_data": content_data}