-
Notifications
You must be signed in to change notification settings - Fork 131
Use a correct EVR sort #4136
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use a correct EVR sort #4136
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Fix a bug where the `retain_package_versions` feature (and prune and a few others) was not correctly determining which packages were most recent. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,12 +4,56 @@ | |
| import typing as t | ||
| from hashlib import sha256 | ||
| from pathlib import Path | ||
| from collections import defaultdict | ||
|
|
||
| import createrepo_c as cr | ||
| from django.conf import settings | ||
| from django.utils.dateparse import parse_datetime | ||
| from importlib_resources import files | ||
| from pulpcore.plugin.exceptions import InvalidSignatureError | ||
| from pulp_rpm.app.rpm_version import RpmVersion | ||
|
|
||
|
|
||
| def annotate_with_age(qs): | ||
| """Provide an "age" score for each Package object in the queryset. | ||
|
|
||
| Annotate the Package objects with an "age". Age is calculated by partitioning the | ||
| Packages by name and architecture and ordering the packages in each group by 'evr', | ||
| which is the relative "age" within the group. The newest package gets age=1, second | ||
| newest age=2, and so on. | ||
|
|
||
| A second partition by architecture is important because there can be packages with | ||
| the same name and version numbers but they are not interchangeable because they have | ||
| differing arch, such as 'x86_64' and 'i686', or 'src' (SRPM) and any other arch. | ||
| """ | ||
| # Get packages in current queryset with their basic info | ||
| packages = list(qs.values("pk", "name", "arch", "epoch", "version", "release")) | ||
|
|
||
| # Group packages by name and arch | ||
| groups = defaultdict(list) | ||
| for pkg in packages: | ||
| key = (pkg["name"], pkg["arch"]) | ||
| groups[key].append(pkg) | ||
|
|
||
| # Calculate age for each group | ||
| age_mapping = {} | ||
| for group_packages in groups.values(): | ||
| # Sort by EVR (newest first) | ||
| group_packages.sort( | ||
| key=lambda p: RpmVersion(p["epoch"], p["version"], p["release"]), reverse=True | ||
| ) | ||
|
|
||
| # Assign ages (1 = newest, 2 = second newest, etc.) | ||
| for age, pkg in enumerate(group_packages, 1): | ||
| age_mapping[pkg["pk"]] = age | ||
|
|
||
| # Create a queryset with age annotation | ||
| # We'll use a CASE statement to map PKs to ages | ||
| from django.db.models import Case, When, IntegerField | ||
|
|
||
| when_clauses = [When(pk=pk, then=age) for pk, age in age_mapping.items()] | ||
|
|
||
| return qs.annotate(age=Case(*when_clauses, output_field=IntegerField())) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As mentioned previously I don't really love this, but it should be "fine" and hopefully it's just a temporary implementation
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remind us again why this is a temporary implementation?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because ideally we have a fix in the database layer that can be shared with Katello. But that's not backportable. |
||
|
|
||
|
|
||
| def format_nevra(name=None, epoch=0, version=None, release=None, arch=None): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,11 +1,11 @@ | ||
| """ | ||
| Unit tests for Package.objects.with_age() functionality. | ||
| Unit tests for Package age calculation functionality. | ||
| """ | ||
|
|
||
| from django.test import TestCase | ||
| from unittest import skip | ||
|
|
||
| from pulp_rpm.app.models import Package | ||
| from pulp_rpm.app.shared_utils import annotate_with_age | ||
|
|
||
|
|
||
| class TestPackageAge(TestCase): | ||
|
|
@@ -99,7 +99,9 @@ def setUp(self): | |
|
|
||
| def test_age_calculation_basic_versions(self): | ||
| """Test that age is calculated correctly for basic version differences.""" | ||
| packages = Package.objects.with_age().filter(name="testpkg", arch="x86_64").order_by("age") | ||
| packages = annotate_with_age( | ||
| Package.objects.filter(name="testpkg", arch="x86_64") | ||
| ).order_by("age") | ||
|
|
||
| # Expected order: 2.0.0 (age=1), 1.5.0-2 (age=2), 1.0.0 (age=3) | ||
| self.assertEqual(packages.count(), 3) | ||
|
|
@@ -123,18 +125,18 @@ def test_age_calculation_basic_versions(self): | |
| def test_age_calculation_different_architectures(self): | ||
| """Test that packages with different architectures are aged separately.""" | ||
| # x86_64 packages | ||
| x86_packages = ( | ||
| Package.objects.with_age().filter(name="testpkg", arch="x86_64").order_by("age") | ||
| ) | ||
| x86_packages = annotate_with_age( | ||
| Package.objects.filter(name="testpkg", arch="x86_64") | ||
| ).order_by("age") | ||
| self.assertEqual(x86_packages.count(), 3) | ||
| self.assertEqual(x86_packages[0].age, 1) # 2.0.0 | ||
| self.assertEqual(x86_packages[1].age, 2) # 1.5.0-2 | ||
| self.assertEqual(x86_packages[2].age, 3) # 1.0.0 | ||
|
|
||
| # i686 packages | ||
| i686_packages = ( | ||
| Package.objects.with_age().filter(name="testpkg", arch="i686").order_by("age") | ||
| ) | ||
| i686_packages = annotate_with_age( | ||
| Package.objects.filter(name="testpkg", arch="i686") | ||
| ).order_by("age") | ||
| self.assertEqual(i686_packages.count(), 2) | ||
|
|
||
| # Even though i686 3.0.0 is newer than any x86_64 version, it should still have age=1 | ||
|
|
@@ -147,13 +149,13 @@ def test_age_calculation_different_architectures(self): | |
| def test_age_calculation_different_names(self): | ||
| """Test that packages with different names are aged separately.""" | ||
| # Different package name should have its own age calculation | ||
| other_packages = Package.objects.with_age().filter(name="otherpkg") | ||
| other_packages = annotate_with_age(Package.objects.filter(name="otherpkg")) | ||
| self.assertEqual(other_packages.count(), 1) | ||
| self.assertEqual(other_packages[0].age, 1) | ||
|
|
||
| def test_age_calculation_with_epochs(self): | ||
| """Test that epoch is considered in age calculation.""" | ||
| epoch_packages = Package.objects.with_age().filter(name="epochpkg").order_by("age") | ||
| epoch_packages = annotate_with_age(Package.objects.filter(name="epochpkg")).order_by("age") | ||
| self.assertEqual(epoch_packages.count(), 2) | ||
|
|
||
| # Epoch 1 should be newer than epoch 0, regardless of version numbers | ||
|
|
@@ -169,7 +171,7 @@ def test_age_calculation_with_epochs(self): | |
|
|
||
| def test_age_all_packages(self): | ||
| """Test age calculation when querying all packages.""" | ||
| all_packages = Package.objects.with_age() | ||
| all_packages = annotate_with_age(Package.objects.all()) | ||
|
|
||
| # Verify each package has an age | ||
| for pkg in all_packages: | ||
|
|
@@ -224,7 +226,7 @@ def test_age_with_release_differences(self): | |
| checksum_type="sha256", | ||
| ) | ||
|
|
||
| packages = Package.objects.with_age().filter(name="relpkg").order_by("age") | ||
| packages = annotate_with_age(Package.objects.filter(name="relpkg")).order_by("age") | ||
|
|
||
| # Expected order: 10.el8 > 2.el8 > 1.el8 (numeric comparison of release) | ||
| self.assertEqual(packages[0].release, "10.el8") # age=1 | ||
|
|
@@ -236,7 +238,6 @@ def test_age_with_release_differences(self): | |
| self.assertEqual(packages[2].release, "1.el8") # age=3 | ||
| self.assertEqual(packages[2].age, 3) | ||
|
|
||
| @skip("The implementation of package age is broken w/r/t '^' and '~' characters") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test passes now |
||
| def test_age_with_tilde_and_caret_versions(self): | ||
| """Test age calculation with tilde and caret version characters.""" | ||
| # Create packages with tilde and caret versions | ||
|
|
@@ -280,7 +281,7 @@ def test_age_with_tilde_and_caret_versions(self): | |
| checksum_type="sha256", | ||
| ) | ||
|
|
||
| packages = Package.objects.with_age().filter(name="tildepkg").order_by("age") | ||
| packages = annotate_with_age(Package.objects.filter(name="tildepkg")).order_by("age") | ||
|
|
||
| # Expected order: 1.0^git123 (age=1), 1.0 (age=2), 1.0~rc2 (age=3), 1.0~rc1 (age=4) | ||
| # Caret sorts higher than regular, regular sorts higher than tilde | ||
|
|
@@ -349,7 +350,7 @@ def test_age_with_numeric_handling_versions(self): | |
| checksum_type="sha256", | ||
| ) | ||
|
|
||
| packages = Package.objects.with_age().filter(name="numpkg").order_by("age") | ||
| packages = annotate_with_age(Package.objects.filter(name="numpkg")).order_by("age") | ||
|
|
||
| # Expected order: 202405210 > 20240521 > 10.10001 > 10.1 == 10.0001 | ||
| # Leading zeros are ignored in numeric segments | ||
|
|
@@ -401,7 +402,7 @@ def test_age_with_non_intuitive_comparison_versions(self): | |
| checksum_type="sha256", | ||
| ) | ||
|
|
||
| packages = Package.objects.with_age().filter(name="intuitivepkg").order_by("age") | ||
| packages = annotate_with_age(Package.objects.filter(name="intuitivepkg")).order_by("age") | ||
|
|
||
| # Expected order: 1g.fc33 > 1.fc33 > 1e.fc33 | ||
| # 'e' comes before numeric in comparison, 'g' comes after numeric | ||
|
|
@@ -467,7 +468,7 @@ def test_age_with_non_alphanumeric_equivalence_versions(self): | |
| checksum_type="sha256", | ||
| ) | ||
|
|
||
| packages = Package.objects.with_age().filter(name="alphapkg").order_by("age") | ||
| packages = annotate_with_age(Package.objects.filter(name="alphapkg")).order_by("age") | ||
|
|
||
| # Expected behavior: 4.999.9 > 4.999 > 4.0 == 4_0 == 4+0 | ||
| # Non-alphanumeric characters are treated as equivalent separators | ||
|
|
@@ -484,7 +485,6 @@ def test_age_with_non_alphanumeric_equivalence_versions(self): | |
| self.assertIn("4_0", remaining_versions) | ||
| self.assertIn("4+0", remaining_versions) | ||
|
|
||
| @skip("Non-ASCII character handling do not work correctly with current implementation") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test passes now |
||
| def test_age_with_non_ascii_character_versions(self): | ||
| """Test age calculation with non-ASCII character handling.""" | ||
| # Create packages with non-ASCII characters | ||
|
|
@@ -528,7 +528,7 @@ def test_age_with_non_ascii_character_versions(self): | |
| checksum_type="sha256", | ||
| ) | ||
|
|
||
| packages = Package.objects.with_age().filter(name="asciipkg").order_by("age") | ||
| packages = annotate_with_age(Package.objects.filter(name="asciipkg")).order_by("age") | ||
|
|
||
| # Expected behavior: 1.11 > 1.1.1 == 1.1.Á.1 > 1.1Á1 | ||
| # Non-ASCII chars are ignored unless they break up alphanumeric sequences | ||
|
|
@@ -586,7 +586,7 @@ def test_age_with_mixed_scenarios(self): | |
| checksum_type="sha256", | ||
| ) | ||
|
|
||
| packages = Package.objects.with_age().filter(name="mixedpkg").order_by("age") | ||
| packages = annotate_with_age(Package.objects.filter(name="mixedpkg")).order_by("age") | ||
|
|
||
| # Expected order: 1:1.0 (epoch wins) > 2.0.0 > 2.0 > 2.0.rc1 | ||
| self.assertEqual(packages[0].epoch, "1") | ||
|
|
@@ -634,11 +634,9 @@ def test_age_with_filtered_queryset(self): | |
| ) | ||
|
|
||
| # Test with filtered queryset (only older versions) | ||
| filtered_packages = ( | ||
| Package.objects.with_age() | ||
| .filter(name="filterpkg", version__in=["1.0", "2.0"]) | ||
| .order_by("age") | ||
| ) | ||
| filtered_packages = annotate_with_age( | ||
| Package.objects.filter(name="filterpkg", version__in=["1.0", "2.0"]) | ||
| ).order_by("age") | ||
|
|
||
| self.assertEqual(filtered_packages.count(), 2) | ||
| # In the filtered set, 2.0 should be newest (age=1), 1.0 should be oldest (age=2) | ||
|
|
@@ -649,7 +647,7 @@ def test_age_with_filtered_queryset(self): | |
|
|
||
| def test_age_with_empty_queryset(self): | ||
| """Test age calculation with empty queryset.""" | ||
| empty_packages = Package.objects.with_age().filter(name="nonexistent") | ||
| empty_packages = annotate_with_age(Package.objects.filter(name="nonexistent")) | ||
| self.assertEqual(len(empty_packages.all()), 0) | ||
|
|
||
| def test_age_with_mixed_filtering_scenarios(self): | ||
|
|
@@ -679,11 +677,9 @@ def test_age_with_mixed_filtering_scenarios(self): | |
| ) | ||
|
|
||
| # Test 1: Filter to get partial group from first package set | ||
| partial_group = ( | ||
| Package.objects.with_age() | ||
| .filter(name="grouppkg", version__in=["2.0", "3.0", "4.0"]) | ||
| .order_by("age") | ||
| ) | ||
| partial_group = annotate_with_age( | ||
| Package.objects.filter(name="grouppkg", version__in=["2.0", "3.0", "4.0"]) | ||
| ).order_by("age") | ||
|
|
||
| self.assertEqual(partial_group.count(), 3) | ||
| # In this filtered set: 4.0 (age=1), 3.0 (age=2), 2.0 (age=3) | ||
|
|
@@ -695,8 +691,8 @@ def test_age_with_mixed_filtering_scenarios(self): | |
| self.assertEqual(partial_group[2].age, 3) | ||
|
|
||
| # Test 2: Filter across multiple package groups | ||
| cross_group = Package.objects.with_age().filter( | ||
| name__in=["grouppkg", "otherpkg"], version="2.0" | ||
| cross_group = annotate_with_age( | ||
| Package.objects.filter(name__in=["grouppkg", "otherpkg"], version="2.0") | ||
| ) | ||
|
|
||
| # Should have one package from each group, both with age=1 in their respective groups | ||
|
|
@@ -728,7 +724,7 @@ def test_age_consistency_with_retention_scenario(self): | |
| created_packages.append(pkg) | ||
|
|
||
| # Test: Get packages and apply age - this should match what retention logic does | ||
| all_packages = Package.objects.with_age().filter(name="retentiontestpkg") | ||
| all_packages = annotate_with_age(Package.objects.filter(name="retentiontestpkg")) | ||
|
|
||
| # Verify all packages have correct age values | ||
| for pkg in all_packages: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The rationale for getting rid of this is that different implementations of
with_age()are not stable when applying additional filters to the queryset. You want the annotation to be the very last thing you do post-filtering to guarantee correctness.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also I recall running into issues because we didn't inherit from the Pulpcore-overridden content manager properly. Easier to avoid conflicts this way.