From e02b6060da48ed036684972d16d226e64ab6bb59 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Mon, 6 Oct 2025 23:24:17 -0400 Subject: [PATCH] Use a correct package EVR sort closes #4124 (cherry picked from commit c7bbe48ba247518fe0cd5e840ec2357811fe49bb) --- CHANGES/4124.bugfix | 1 + pulp_rpm/app/models/package.py | 31 +----------- pulp_rpm/app/models/repository.py | 10 ++-- pulp_rpm/app/shared_utils.py | 44 +++++++++++++++++ pulp_rpm/app/tasks/copy.py | 5 +- pulp_rpm/app/tasks/prune.py | 3 +- pulp_rpm/tests/unit/test_package_age.py | 66 ++++++++++++------------- 7 files changed, 86 insertions(+), 74 deletions(-) create mode 100644 CHANGES/4124.bugfix diff --git a/CHANGES/4124.bugfix b/CHANGES/4124.bugfix new file mode 100644 index 0000000000..e1da99ee6c --- /dev/null +++ b/CHANGES/4124.bugfix @@ -0,0 +1 @@ +Fix a bug where the `retain_package_versions` feature (and prune and a few others) was not correctly determining which packages were most recent. diff --git a/pulp_rpm/app/models/package.py b/pulp_rpm/app/models/package.py index 781df2d581..e48a7563e4 100644 --- a/pulp_rpm/app/models/package.py +++ b/pulp_rpm/app/models/package.py @@ -4,10 +4,8 @@ from django.conf import settings from django.db import models -from django.db.models import Window, F -from django.db.models.functions import RowNumber -from pulpcore.plugin.models import Content, ContentManager +from pulpcore.plugin.models import Content from pulpcore.plugin.util import get_domain_pk from pulp_rpm.app.constants import ( @@ -34,31 +32,6 @@ def db_type(self, connection): return "pulp_evr_t" -class PackageManager(ContentManager): - """Custom Package object manager.""" - - def with_age(self): - """Provide an "age" score for each Package object in the queryset. - - Annotate the Package objects with an "age". Age is calculated with a postgresql - window function which partitions the Packages by name and architecture, orders the - packages in each group by 'evr', and returns the row number of each package, which - is the relative "age" within the group. The newest package gets age=1, second newest - age=2, and so on. - - A second partition by architecture is important because there can be packages with - the same name and verison numbers but they are not interchangeable because they have - differing arch, such as 'x86_64' and 'i686', or 'src' (SRPM) and any other arch. - """ - return self.annotate( - age=Window( - expression=RowNumber(), - partition_by=[F("name"), F("arch")], - order_by=F("evr").desc(), - ) - ) - - class Package(Content): """ The "Package" content type. Formerly "rpm" in Pulp 2. @@ -155,8 +128,6 @@ class Package(Content): attribute in the primary XML. """ - objects = PackageManager() - PROTECTED_FROM_RECLAIM = False TYPE = "package" diff --git a/pulp_rpm/app/models/repository.py b/pulp_rpm/app/models/repository.py index 263b0e7ca3..0955dbd343 100644 --- a/pulp_rpm/app/models/repository.py +++ b/pulp_rpm/app/models/repository.py @@ -47,7 +47,7 @@ RpmPackageSigningService, UpdateRecord, ) -from pulp_rpm.app.shared_utils import urlpath_sanitize +from pulp_rpm.app.shared_utils import urlpath_sanitize, annotate_with_age log = getLogger(__name__) @@ -454,13 +454,11 @@ def _apply_retention_policy(self, new_version): # django-managed and would be difficult to share. # # Instead we have to do the filtering manually. - nonmodular_packages = ( - Package.objects.with_age() - .filter( + nonmodular_packages = annotate_with_age( + Package.objects.filter( pk__in=new_version.content.filter(pulp_type=Package.get_pulp_type()), is_modular=False, # don't want to filter out modular RPMs - ) - .only("pk") + ).only("pk") ) old_packages = [] diff --git a/pulp_rpm/app/shared_utils.py b/pulp_rpm/app/shared_utils.py index dd0ca6e229..9ca5311314 100644 --- a/pulp_rpm/app/shared_utils.py +++ b/pulp_rpm/app/shared_utils.py @@ -4,12 +4,56 @@ import typing as t from hashlib import sha256 from pathlib import Path +from collections import defaultdict import createrepo_c as cr from django.conf import settings from django.utils.dateparse import parse_datetime from importlib_resources import files from pulpcore.plugin.exceptions import InvalidSignatureError +from pulp_rpm.app.rpm_version import RpmVersion + + +def annotate_with_age(qs): + """Provide an "age" score for each Package object in the queryset. + + Annotate the Package objects with an "age". Age is calculated by partitioning the + Packages by name and architecture and ordering the packages in each group by 'evr', + which is the relative "age" within the group. The newest package gets age=1, second + newest age=2, and so on. + + A second partition by architecture is important because there can be packages with + the same name and version numbers but they are not interchangeable because they have + differing arch, such as 'x86_64' and 'i686', or 'src' (SRPM) and any other arch. + """ + # Get packages in current queryset with their basic info + packages = list(qs.values("pk", "name", "arch", "epoch", "version", "release")) + + # Group packages by name and arch + groups = defaultdict(list) + for pkg in packages: + key = (pkg["name"], pkg["arch"]) + groups[key].append(pkg) + + # Calculate age for each group + age_mapping = {} + for group_packages in groups.values(): + # Sort by EVR (newest first) + group_packages.sort( + key=lambda p: RpmVersion(p["epoch"], p["version"], p["release"]), reverse=True + ) + + # Assign ages (1 = newest, 2 = second newest, etc.) + for age, pkg in enumerate(group_packages, 1): + age_mapping[pkg["pk"]] = age + + # Create a queryset with age annotation + # We'll use a CASE statement to map PKs to ages + from django.db.models import Case, When, IntegerField + + when_clauses = [When(pk=pk, then=age) for pk, age in age_mapping.items()] + + return qs.annotate(age=Case(*when_clauses, output_field=IntegerField())) def format_nevra(name=None, epoch=0, version=None, release=None, arch=None): diff --git a/pulp_rpm/app/tasks/copy.py b/pulp_rpm/app/tasks/copy.py index 397981bd0f..82e667f2a8 100644 --- a/pulp_rpm/app/tasks/copy.py +++ b/pulp_rpm/app/tasks/copy.py @@ -14,6 +14,7 @@ RpmRepository, Modulemd, ) +from pulp_rpm.app.shared_utils import annotate_with_age def find_children_of_content(content, src_repo_version): @@ -111,8 +112,8 @@ def find_children_of_content(content, src_repo_version): missing_package_names = packagegroup_package_names - set(existing_package_names) - needed_packages = Package.objects.with_age().filter( - name__in=missing_package_names, pk__in=src_repo_version.content + needed_packages = annotate_with_age( + Package.objects.filter(name__in=missing_package_names, pk__in=src_repo_version.content) ) # Pick the latest version of each package available which isn't already present diff --git a/pulp_rpm/app/tasks/prune.py b/pulp_rpm/app/tasks/prune.py index 4489988f8f..58a53bb67d 100644 --- a/pulp_rpm/app/tasks/prune.py +++ b/pulp_rpm/app/tasks/prune.py @@ -15,6 +15,7 @@ from pulpcore.plugin.tasking import dispatch from pulp_rpm.app.models.package import Package from pulp_rpm.app.models.repository import RpmRepository +from pulp_rpm.app.shared_utils import annotate_with_age log = getLogger(__name__) @@ -36,7 +37,7 @@ def prune_repo_packages(repo_pk, keep_days, dry_run): # We only care about RPM-Names that have more than one EVRA - "singles" are always kept. rpm_by_name_age = ( - curr_vers.get_content(Package.objects.with_age()) + annotate_with_age(curr_vers.get_content(Package.objects)) .filter(age__gt=1) .order_by("name", "epoch", "version", "release", "arch") .values("pk") diff --git a/pulp_rpm/tests/unit/test_package_age.py b/pulp_rpm/tests/unit/test_package_age.py index cb20103486..792f4dc222 100644 --- a/pulp_rpm/tests/unit/test_package_age.py +++ b/pulp_rpm/tests/unit/test_package_age.py @@ -1,11 +1,11 @@ """ -Unit tests for Package.objects.with_age() functionality. +Unit tests for Package age calculation functionality. """ from django.test import TestCase -from unittest import skip from pulp_rpm.app.models import Package +from pulp_rpm.app.shared_utils import annotate_with_age class TestPackageAge(TestCase): @@ -99,7 +99,9 @@ def setUp(self): def test_age_calculation_basic_versions(self): """Test that age is calculated correctly for basic version differences.""" - packages = Package.objects.with_age().filter(name="testpkg", arch="x86_64").order_by("age") + packages = annotate_with_age( + Package.objects.filter(name="testpkg", arch="x86_64") + ).order_by("age") # Expected order: 2.0.0 (age=1), 1.5.0-2 (age=2), 1.0.0 (age=3) self.assertEqual(packages.count(), 3) @@ -123,18 +125,18 @@ def test_age_calculation_basic_versions(self): def test_age_calculation_different_architectures(self): """Test that packages with different architectures are aged separately.""" # x86_64 packages - x86_packages = ( - Package.objects.with_age().filter(name="testpkg", arch="x86_64").order_by("age") - ) + x86_packages = annotate_with_age( + Package.objects.filter(name="testpkg", arch="x86_64") + ).order_by("age") self.assertEqual(x86_packages.count(), 3) self.assertEqual(x86_packages[0].age, 1) # 2.0.0 self.assertEqual(x86_packages[1].age, 2) # 1.5.0-2 self.assertEqual(x86_packages[2].age, 3) # 1.0.0 # i686 packages - i686_packages = ( - Package.objects.with_age().filter(name="testpkg", arch="i686").order_by("age") - ) + i686_packages = annotate_with_age( + Package.objects.filter(name="testpkg", arch="i686") + ).order_by("age") self.assertEqual(i686_packages.count(), 2) # Even though i686 3.0.0 is newer than any x86_64 version, it should still have age=1 @@ -147,13 +149,13 @@ def test_age_calculation_different_architectures(self): def test_age_calculation_different_names(self): """Test that packages with different names are aged separately.""" # Different package name should have its own age calculation - other_packages = Package.objects.with_age().filter(name="otherpkg") + other_packages = annotate_with_age(Package.objects.filter(name="otherpkg")) self.assertEqual(other_packages.count(), 1) self.assertEqual(other_packages[0].age, 1) def test_age_calculation_with_epochs(self): """Test that epoch is considered in age calculation.""" - epoch_packages = Package.objects.with_age().filter(name="epochpkg").order_by("age") + epoch_packages = annotate_with_age(Package.objects.filter(name="epochpkg")).order_by("age") self.assertEqual(epoch_packages.count(), 2) # Epoch 1 should be newer than epoch 0, regardless of version numbers @@ -169,7 +171,7 @@ def test_age_calculation_with_epochs(self): def test_age_all_packages(self): """Test age calculation when querying all packages.""" - all_packages = Package.objects.with_age() + all_packages = annotate_with_age(Package.objects.all()) # Verify each package has an age for pkg in all_packages: @@ -224,7 +226,7 @@ def test_age_with_release_differences(self): checksum_type="sha256", ) - packages = Package.objects.with_age().filter(name="relpkg").order_by("age") + packages = annotate_with_age(Package.objects.filter(name="relpkg")).order_by("age") # Expected order: 10.el8 > 2.el8 > 1.el8 (numeric comparison of release) self.assertEqual(packages[0].release, "10.el8") # age=1 @@ -236,7 +238,6 @@ def test_age_with_release_differences(self): self.assertEqual(packages[2].release, "1.el8") # age=3 self.assertEqual(packages[2].age, 3) - @skip("The implementation of package age is broken w/r/t '^' and '~' characters") def test_age_with_tilde_and_caret_versions(self): """Test age calculation with tilde and caret version characters.""" # Create packages with tilde and caret versions @@ -280,7 +281,7 @@ def test_age_with_tilde_and_caret_versions(self): checksum_type="sha256", ) - packages = Package.objects.with_age().filter(name="tildepkg").order_by("age") + packages = annotate_with_age(Package.objects.filter(name="tildepkg")).order_by("age") # Expected order: 1.0^git123 (age=1), 1.0 (age=2), 1.0~rc2 (age=3), 1.0~rc1 (age=4) # Caret sorts higher than regular, regular sorts higher than tilde @@ -349,7 +350,7 @@ def test_age_with_numeric_handling_versions(self): checksum_type="sha256", ) - packages = Package.objects.with_age().filter(name="numpkg").order_by("age") + packages = annotate_with_age(Package.objects.filter(name="numpkg")).order_by("age") # Expected order: 202405210 > 20240521 > 10.10001 > 10.1 == 10.0001 # Leading zeros are ignored in numeric segments @@ -401,7 +402,7 @@ def test_age_with_non_intuitive_comparison_versions(self): checksum_type="sha256", ) - packages = Package.objects.with_age().filter(name="intuitivepkg").order_by("age") + packages = annotate_with_age(Package.objects.filter(name="intuitivepkg")).order_by("age") # Expected order: 1g.fc33 > 1.fc33 > 1e.fc33 # 'e' comes before numeric in comparison, 'g' comes after numeric @@ -467,7 +468,7 @@ def test_age_with_non_alphanumeric_equivalence_versions(self): checksum_type="sha256", ) - packages = Package.objects.with_age().filter(name="alphapkg").order_by("age") + packages = annotate_with_age(Package.objects.filter(name="alphapkg")).order_by("age") # Expected behavior: 4.999.9 > 4.999 > 4.0 == 4_0 == 4+0 # Non-alphanumeric characters are treated as equivalent separators @@ -484,7 +485,6 @@ def test_age_with_non_alphanumeric_equivalence_versions(self): self.assertIn("4_0", remaining_versions) self.assertIn("4+0", remaining_versions) - @skip("Non-ASCII character handling do not work correctly with current implementation") def test_age_with_non_ascii_character_versions(self): """Test age calculation with non-ASCII character handling.""" # Create packages with non-ASCII characters @@ -528,7 +528,7 @@ def test_age_with_non_ascii_character_versions(self): checksum_type="sha256", ) - packages = Package.objects.with_age().filter(name="asciipkg").order_by("age") + packages = annotate_with_age(Package.objects.filter(name="asciipkg")).order_by("age") # Expected behavior: 1.11 > 1.1.1 == 1.1.Á.1 > 1.1Á1 # Non-ASCII chars are ignored unless they break up alphanumeric sequences @@ -586,7 +586,7 @@ def test_age_with_mixed_scenarios(self): checksum_type="sha256", ) - packages = Package.objects.with_age().filter(name="mixedpkg").order_by("age") + packages = annotate_with_age(Package.objects.filter(name="mixedpkg")).order_by("age") # Expected order: 1:1.0 (epoch wins) > 2.0.0 > 2.0 > 2.0.rc1 self.assertEqual(packages[0].epoch, "1") @@ -634,11 +634,9 @@ def test_age_with_filtered_queryset(self): ) # Test with filtered queryset (only older versions) - filtered_packages = ( - Package.objects.with_age() - .filter(name="filterpkg", version__in=["1.0", "2.0"]) - .order_by("age") - ) + filtered_packages = annotate_with_age( + Package.objects.filter(name="filterpkg", version__in=["1.0", "2.0"]) + ).order_by("age") self.assertEqual(filtered_packages.count(), 2) # In the filtered set, 2.0 should be newest (age=1), 1.0 should be oldest (age=2) @@ -649,7 +647,7 @@ def test_age_with_filtered_queryset(self): def test_age_with_empty_queryset(self): """Test age calculation with empty queryset.""" - empty_packages = Package.objects.with_age().filter(name="nonexistent") + empty_packages = annotate_with_age(Package.objects.filter(name="nonexistent")) self.assertEqual(len(empty_packages.all()), 0) def test_age_with_mixed_filtering_scenarios(self): @@ -679,11 +677,9 @@ def test_age_with_mixed_filtering_scenarios(self): ) # Test 1: Filter to get partial group from first package set - partial_group = ( - Package.objects.with_age() - .filter(name="grouppkg", version__in=["2.0", "3.0", "4.0"]) - .order_by("age") - ) + partial_group = annotate_with_age( + Package.objects.filter(name="grouppkg", version__in=["2.0", "3.0", "4.0"]) + ).order_by("age") self.assertEqual(partial_group.count(), 3) # In this filtered set: 4.0 (age=1), 3.0 (age=2), 2.0 (age=3) @@ -695,8 +691,8 @@ def test_age_with_mixed_filtering_scenarios(self): self.assertEqual(partial_group[2].age, 3) # Test 2: Filter across multiple package groups - cross_group = Package.objects.with_age().filter( - name__in=["grouppkg", "otherpkg"], version="2.0" + cross_group = annotate_with_age( + Package.objects.filter(name__in=["grouppkg", "otherpkg"], version="2.0") ) # Should have one package from each group, both with age=1 in their respective groups @@ -728,7 +724,7 @@ def test_age_consistency_with_retention_scenario(self): created_packages.append(pkg) # Test: Get packages and apply age - this should match what retention logic does - all_packages = Package.objects.with_age().filter(name="retentiontestpkg") + all_packages = annotate_with_age(Package.objects.filter(name="retentiontestpkg")) # Verify all packages have correct age values for pkg in all_packages: