From 4c4dff3f219ab639e0953772b14abc94ca13ff91 Mon Sep 17 00:00:00 2001 From: Matthias Dellweg Date: Tue, 2 Jun 2026 16:28:54 +0200 Subject: [PATCH] Add resiliant repair for raw manifest data on sync In case the data field was not properly populated during sync, we download it again and save it to the database. (cherry picked from commit fb5060543c6cba6e68952b6d9ea4b029f5132410) --- CHANGES/+sync_repair_manifest_data.bugfix | 1 + pulp_container/app/tasks/sync_stages.py | 45 ++++++++++++++++++----- 2 files changed, 36 insertions(+), 10 deletions(-) create mode 100644 CHANGES/+sync_repair_manifest_data.bugfix diff --git a/CHANGES/+sync_repair_manifest_data.bugfix b/CHANGES/+sync_repair_manifest_data.bugfix new file mode 100644 index 000000000..d8148d1f0 --- /dev/null +++ b/CHANGES/+sync_repair_manifest_data.bugfix @@ -0,0 +1 @@ +Add transparent repair code during Manifest sync to redownload the raw data in case it was lost during an upgrade. diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 244b8ba52..340590e49 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -87,7 +87,15 @@ async def _check_for_existing_manifest(self, download_tag): digest=digest, pulp_domain=get_domain() ).afirst(): raw_text_data = manifest.data - content_data = json.loads(raw_text_data) + if raw_text_data is None: + # This situation allegedly happens on some upgrade paths. + # A migration should mark the field not Null eventually. At that point this + # workaround can be removed. + content_data, raw_text_data, _ = await self._download_manifest_data(response.url) + manifest.data = raw_text_data + await manifest.asave() + else: + content_data = json.loads(raw_text_data) else: content_data, raw_text_data, response = await self._download_manifest_data(response.url) @@ -502,6 +510,8 @@ async def create_listed_manifest(self, manifest_data): """ digest = manifest_data["digest"] + # in oci-index spec, platform is an optional field + platform = manifest_data.get("platform", None) relative_url = "/v2/{name}/manifests/{digest}".format( name=self.remote.namespaced_upstream_name, digest=digest ) @@ -510,15 +520,30 @@ async def create_listed_manifest(self, manifest_data): if manifest := await Manifest.objects.filter( digest=digest, pulp_domain=get_domain() ).afirst(): - content_data = json.loads(manifest.data) - - content_data, manifest = await self._download_and_instantiate_manifest(manifest_url, digest) - - # in oci-index spec, platform is an optional field - platform = manifest_data.get("platform", None) - if platform: - manifest.os = platform["os"] - manifest.architecture = platform["architecture"] + raw_text_data = manifest.data + if raw_text_data is None: + # This situation allegedly happens on some upgrade paths. + # A migration should mark the field not Null eventually. At that point this + # workaround can be removed. + content_data, raw_text_data, response = await self._download_manifest_data( + manifest_url + ) + media_type = determine_media_type(content_data, response) + validate_manifest(content_data, media_type, digest) + manifest.data = raw_text_data + if platform: + manifest.os = platform["os"] + manifest.architecture = platform["architecture"] + await manifest.asave() + else: + content_data = json.loads(raw_text_data) + else: + content_data, manifest = await self._download_and_instantiate_manifest( + manifest_url, digest + ) + if platform: + manifest.os = platform["os"] + manifest.architecture = platform["architecture"] man_dc = DeclarativeContent(content=manifest) return {"manifest_dc": man_dc, "platform": platform, "content_data": content_data}