Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/4124.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug where the `retain_package_versions` feature (and prune and a few others) was not correctly determining which packages were most recent.
31 changes: 1 addition & 30 deletions pulp_rpm/app/models/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@

from django.conf import settings
from django.db import models
from django.db.models import Window, F
from django.db.models.functions import RowNumber

from pulpcore.plugin.models import Content, ContentManager
from pulpcore.plugin.models import Content
from pulpcore.plugin.util import get_domain_pk

from pulp_rpm.app.constants import (
Expand All @@ -34,31 +32,6 @@ def db_type(self, connection):
return "pulp_evr_t"


class PackageManager(ContentManager):
"""Custom Package object manager."""

def with_age(self):
"""Provide an "age" score for each Package object in the queryset.

Annotate the Package objects with an "age". Age is calculated with a postgresql
window function which partitions the Packages by name and architecture, orders the
packages in each group by 'evr', and returns the row number of each package, which
is the relative "age" within the group. The newest package gets age=1, second newest
age=2, and so on.

A second partition by architecture is important because there can be packages with
the same name and verison numbers but they are not interchangeable because they have
differing arch, such as 'x86_64' and 'i686', or 'src' (SRPM) and any other arch.
"""
return self.annotate(
age=Window(
expression=RowNumber(),
partition_by=[F("name"), F("arch")],
order_by=F("evr").desc(),
)
)


class Package(Content):
"""
The "Package" content type. Formerly "rpm" in Pulp 2.
Expand Down Expand Up @@ -155,8 +128,6 @@ class Package(Content):
attribute in the primary XML.
"""

objects = PackageManager()

PROTECTED_FROM_RECLAIM = False

TYPE = "package"
Expand Down
10 changes: 4 additions & 6 deletions pulp_rpm/app/models/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
RpmPackageSigningService,
UpdateRecord,
)
from pulp_rpm.app.shared_utils import urlpath_sanitize
from pulp_rpm.app.shared_utils import urlpath_sanitize, annotate_with_age

log = getLogger(__name__)

Expand Down Expand Up @@ -454,13 +454,11 @@ def _apply_retention_policy(self, new_version):
# django-managed and would be difficult to share.
#
# Instead we have to do the filtering manually.
nonmodular_packages = (
Package.objects.with_age()
.filter(
nonmodular_packages = annotate_with_age(
Package.objects.filter(
pk__in=new_version.content.filter(pulp_type=Package.get_pulp_type()),
is_modular=False, # don't want to filter out modular RPMs
)
.only("pk")
).only("pk")
)

old_packages = []
Expand Down
44 changes: 44 additions & 0 deletions pulp_rpm/app/shared_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,56 @@
import typing as t
from hashlib import sha256
from pathlib import Path
from collections import defaultdict

import createrepo_c as cr
from django.conf import settings
from django.utils.dateparse import parse_datetime
from importlib_resources import files
from pulpcore.plugin.exceptions import InvalidSignatureError
from pulp_rpm.app.rpm_version import RpmVersion


def annotate_with_age(qs):
"""Provide an "age" score for each Package object in the queryset.

Annotate the Package objects with an "age". Age is calculated by partitioning the
Packages by name and architecture and ordering the packages in each group by 'evr',
which is the relative "age" within the group. The newest package gets age=1, second
newest age=2, and so on.

A second partition by architecture is important because there can be packages with
the same name and version numbers but they are not interchangeable because they have
differing arch, such as 'x86_64' and 'i686', or 'src' (SRPM) and any other arch.
"""
# Get packages in current queryset with their basic info
packages = list(qs.values("pk", "name", "arch", "epoch", "version", "release"))

# Group packages by name and arch
groups = defaultdict(list)
for pkg in packages:
key = (pkg["name"], pkg["arch"])
groups[key].append(pkg)

# Calculate age for each group
age_mapping = {}
for group_packages in groups.values():
# Sort by EVR (newest first)
group_packages.sort(
key=lambda p: RpmVersion(p["epoch"], p["version"], p["release"]), reverse=True
)

# Assign ages (1 = newest, 2 = second newest, etc.)
for age, pkg in enumerate(group_packages, 1):
age_mapping[pkg["pk"]] = age

# Create a queryset with age annotation
# We'll use a CASE statement to map PKs to ages
from django.db.models import Case, When, IntegerField

when_clauses = [When(pk=pk, then=age) for pk, age in age_mapping.items()]

return qs.annotate(age=Case(*when_clauses, output_field=IntegerField()))


def format_nevra(name=None, epoch=0, version=None, release=None, arch=None):
Expand Down
5 changes: 3 additions & 2 deletions pulp_rpm/app/tasks/copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
RpmRepository,
Modulemd,
)
from pulp_rpm.app.shared_utils import annotate_with_age


def find_children_of_content(content, src_repo_version):
Expand Down Expand Up @@ -111,8 +112,8 @@ def find_children_of_content(content, src_repo_version):

missing_package_names = packagegroup_package_names - set(existing_package_names)

needed_packages = Package.objects.with_age().filter(
name__in=missing_package_names, pk__in=src_repo_version.content
needed_packages = annotate_with_age(
Package.objects.filter(name__in=missing_package_names, pk__in=src_repo_version.content)
)

# Pick the latest version of each package available which isn't already present
Expand Down
3 changes: 2 additions & 1 deletion pulp_rpm/app/tasks/prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pulpcore.plugin.tasking import dispatch
from pulp_rpm.app.models.package import Package
from pulp_rpm.app.models.repository import RpmRepository
from pulp_rpm.app.shared_utils import annotate_with_age

log = getLogger(__name__)

Expand All @@ -36,7 +37,7 @@ def prune_repo_packages(repo_pk, keep_days, dry_run):

# We only care about RPM-Names that have more than one EVRA - "singles" are always kept.
rpm_by_name_age = (
curr_vers.get_content(Package.objects.with_age())
annotate_with_age(curr_vers.get_content(Package.objects))
.filter(age__gt=1)
.order_by("name", "epoch", "version", "release", "arch")
.values("pk")
Expand Down
66 changes: 31 additions & 35 deletions pulp_rpm/tests/unit/test_package_age.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""
Unit tests for Package.objects.with_age() functionality.
Unit tests for Package age calculation functionality.
"""

from django.test import TestCase
from unittest import skip

from pulp_rpm.app.models import Package
from pulp_rpm.app.shared_utils import annotate_with_age


class TestPackageAge(TestCase):
Expand Down Expand Up @@ -99,7 +99,9 @@ def setUp(self):

def test_age_calculation_basic_versions(self):
"""Test that age is calculated correctly for basic version differences."""
packages = Package.objects.with_age().filter(name="testpkg", arch="x86_64").order_by("age")
packages = annotate_with_age(
Package.objects.filter(name="testpkg", arch="x86_64")
).order_by("age")

# Expected order: 2.0.0 (age=1), 1.5.0-2 (age=2), 1.0.0 (age=3)
self.assertEqual(packages.count(), 3)
Expand All @@ -123,18 +125,18 @@ def test_age_calculation_basic_versions(self):
def test_age_calculation_different_architectures(self):
"""Test that packages with different architectures are aged separately."""
# x86_64 packages
x86_packages = (
Package.objects.with_age().filter(name="testpkg", arch="x86_64").order_by("age")
)
x86_packages = annotate_with_age(
Package.objects.filter(name="testpkg", arch="x86_64")
).order_by("age")
self.assertEqual(x86_packages.count(), 3)
self.assertEqual(x86_packages[0].age, 1) # 2.0.0
self.assertEqual(x86_packages[1].age, 2) # 1.5.0-2
self.assertEqual(x86_packages[2].age, 3) # 1.0.0

# i686 packages
i686_packages = (
Package.objects.with_age().filter(name="testpkg", arch="i686").order_by("age")
)
i686_packages = annotate_with_age(
Package.objects.filter(name="testpkg", arch="i686")
).order_by("age")
self.assertEqual(i686_packages.count(), 2)

# Even though i686 3.0.0 is newer than any x86_64 version, it should still have age=1
Expand All @@ -147,13 +149,13 @@ def test_age_calculation_different_architectures(self):
def test_age_calculation_different_names(self):
"""Test that packages with different names are aged separately."""
# Different package name should have its own age calculation
other_packages = Package.objects.with_age().filter(name="otherpkg")
other_packages = annotate_with_age(Package.objects.filter(name="otherpkg"))
self.assertEqual(other_packages.count(), 1)
self.assertEqual(other_packages[0].age, 1)

def test_age_calculation_with_epochs(self):
"""Test that epoch is considered in age calculation."""
epoch_packages = Package.objects.with_age().filter(name="epochpkg").order_by("age")
epoch_packages = annotate_with_age(Package.objects.filter(name="epochpkg")).order_by("age")
self.assertEqual(epoch_packages.count(), 2)

# Epoch 1 should be newer than epoch 0, regardless of version numbers
Expand All @@ -169,7 +171,7 @@ def test_age_calculation_with_epochs(self):

def test_age_all_packages(self):
"""Test age calculation when querying all packages."""
all_packages = Package.objects.with_age()
all_packages = annotate_with_age(Package.objects.all())

# Verify each package has an age
for pkg in all_packages:
Expand Down Expand Up @@ -224,7 +226,7 @@ def test_age_with_release_differences(self):
checksum_type="sha256",
)

packages = Package.objects.with_age().filter(name="relpkg").order_by("age")
packages = annotate_with_age(Package.objects.filter(name="relpkg")).order_by("age")

# Expected order: 10.el8 > 2.el8 > 1.el8 (numeric comparison of release)
self.assertEqual(packages[0].release, "10.el8") # age=1
Expand All @@ -236,7 +238,6 @@ def test_age_with_release_differences(self):
self.assertEqual(packages[2].release, "1.el8") # age=3
self.assertEqual(packages[2].age, 3)

@skip("The implementation of package age is broken w/r/t '^' and '~' characters")
def test_age_with_tilde_and_caret_versions(self):
"""Test age calculation with tilde and caret version characters."""
# Create packages with tilde and caret versions
Expand Down Expand Up @@ -280,7 +281,7 @@ def test_age_with_tilde_and_caret_versions(self):
checksum_type="sha256",
)

packages = Package.objects.with_age().filter(name="tildepkg").order_by("age")
packages = annotate_with_age(Package.objects.filter(name="tildepkg")).order_by("age")

# Expected order: 1.0^git123 (age=1), 1.0 (age=2), 1.0~rc2 (age=3), 1.0~rc1 (age=4)
# Caret sorts higher than regular, regular sorts higher than tilde
Expand Down Expand Up @@ -349,7 +350,7 @@ def test_age_with_numeric_handling_versions(self):
checksum_type="sha256",
)

packages = Package.objects.with_age().filter(name="numpkg").order_by("age")
packages = annotate_with_age(Package.objects.filter(name="numpkg")).order_by("age")

# Expected order: 202405210 > 20240521 > 10.10001 > 10.1 == 10.0001
# Leading zeros are ignored in numeric segments
Expand Down Expand Up @@ -401,7 +402,7 @@ def test_age_with_non_intuitive_comparison_versions(self):
checksum_type="sha256",
)

packages = Package.objects.with_age().filter(name="intuitivepkg").order_by("age")
packages = annotate_with_age(Package.objects.filter(name="intuitivepkg")).order_by("age")

# Expected order: 1g.fc33 > 1.fc33 > 1e.fc33
# 'e' comes before numeric in comparison, 'g' comes after numeric
Expand Down Expand Up @@ -467,7 +468,7 @@ def test_age_with_non_alphanumeric_equivalence_versions(self):
checksum_type="sha256",
)

packages = Package.objects.with_age().filter(name="alphapkg").order_by("age")
packages = annotate_with_age(Package.objects.filter(name="alphapkg")).order_by("age")

# Expected behavior: 4.999.9 > 4.999 > 4.0 == 4_0 == 4+0
# Non-alphanumeric characters are treated as equivalent separators
Expand All @@ -484,7 +485,6 @@ def test_age_with_non_alphanumeric_equivalence_versions(self):
self.assertIn("4_0", remaining_versions)
self.assertIn("4+0", remaining_versions)

@skip("Non-ASCII character handling do not work correctly with current implementation")
def test_age_with_non_ascii_character_versions(self):
"""Test age calculation with non-ASCII character handling."""
# Create packages with non-ASCII characters
Expand Down Expand Up @@ -528,7 +528,7 @@ def test_age_with_non_ascii_character_versions(self):
checksum_type="sha256",
)

packages = Package.objects.with_age().filter(name="asciipkg").order_by("age")
packages = annotate_with_age(Package.objects.filter(name="asciipkg")).order_by("age")

# Expected behavior: 1.11 > 1.1.1 == 1.1.Á.1 > 1.1Á1
# Non-ASCII chars are ignored unless they break up alphanumeric sequences
Expand Down Expand Up @@ -586,7 +586,7 @@ def test_age_with_mixed_scenarios(self):
checksum_type="sha256",
)

packages = Package.objects.with_age().filter(name="mixedpkg").order_by("age")
packages = annotate_with_age(Package.objects.filter(name="mixedpkg")).order_by("age")

# Expected order: 1:1.0 (epoch wins) > 2.0.0 > 2.0 > 2.0.rc1
self.assertEqual(packages[0].epoch, "1")
Expand Down Expand Up @@ -634,11 +634,9 @@ def test_age_with_filtered_queryset(self):
)

# Test with filtered queryset (only older versions)
filtered_packages = (
Package.objects.with_age()
.filter(name="filterpkg", version__in=["1.0", "2.0"])
.order_by("age")
)
filtered_packages = annotate_with_age(
Package.objects.filter(name="filterpkg", version__in=["1.0", "2.0"])
).order_by("age")

self.assertEqual(filtered_packages.count(), 2)
# In the filtered set, 2.0 should be newest (age=1), 1.0 should be oldest (age=2)
Expand All @@ -649,7 +647,7 @@ def test_age_with_filtered_queryset(self):

def test_age_with_empty_queryset(self):
"""Test age calculation with empty queryset."""
empty_packages = Package.objects.with_age().filter(name="nonexistent")
empty_packages = annotate_with_age(Package.objects.filter(name="nonexistent"))
self.assertEqual(len(empty_packages.all()), 0)

def test_age_with_mixed_filtering_scenarios(self):
Expand Down Expand Up @@ -679,11 +677,9 @@ def test_age_with_mixed_filtering_scenarios(self):
)

# Test 1: Filter to get partial group from first package set
partial_group = (
Package.objects.with_age()
.filter(name="grouppkg", version__in=["2.0", "3.0", "4.0"])
.order_by("age")
)
partial_group = annotate_with_age(
Package.objects.filter(name="grouppkg", version__in=["2.0", "3.0", "4.0"])
).order_by("age")

self.assertEqual(partial_group.count(), 3)
# In this filtered set: 4.0 (age=1), 3.0 (age=2), 2.0 (age=3)
Expand All @@ -695,8 +691,8 @@ def test_age_with_mixed_filtering_scenarios(self):
self.assertEqual(partial_group[2].age, 3)

# Test 2: Filter across multiple package groups
cross_group = Package.objects.with_age().filter(
name__in=["grouppkg", "otherpkg"], version="2.0"
cross_group = annotate_with_age(
Package.objects.filter(name__in=["grouppkg", "otherpkg"], version="2.0")
)

# Should have one package from each group, both with age=1 in their respective groups
Expand Down Expand Up @@ -728,7 +724,7 @@ def test_age_consistency_with_retention_scenario(self):
created_packages.append(pkg)

# Test: Get packages and apply age - this should match what retention logic does
all_packages = Package.objects.with_age().filter(name="retentiontestpkg")
all_packages = annotate_with_age(Package.objects.filter(name="retentiontestpkg"))

# Verify all packages have correct age values
for pkg in all_packages:
Expand Down