diff --git a/CHANGES/+vuln-report.feature b/CHANGES/+vuln-report.feature new file mode 100644 index 000000000..d740bd9e4 --- /dev/null +++ b/CHANGES/+vuln-report.feature @@ -0,0 +1 @@ +Added a `vulnerability_report` action to `RpmRepositoryVersionViewSet` that scans all RPM packages in a repository version for known CVEs via osv.dev. Repositories are opted in by setting the `osv.rpm.ecosystem` label (e.g., `Red Hat`); `osv.rpm.redhat.cpes` is an optional label to narrow the query to specific CPEs. \ No newline at end of file diff --git a/pulp_rpm/app/constants.py b/pulp_rpm/app/constants.py index 01dabfeb4..79d3d6d47 100644 --- a/pulp_rpm/app/constants.py +++ b/pulp_rpm/app/constants.py @@ -288,6 +288,19 @@ PACKAGES_DIRECTORY = "Packages" DIST_TREE_MAIN_REPO_PATH = "." +LABEL_OSV_CONFIG = "osv.rpm.config" +SUPPORTED_ECOSYSTEMS = { + "AlmaLinux", + "Azure Linux", + "Mageia", + "openEuler", + "openSUSE", + "Photon OS", + "Red Hat", + "Rocky Linux", + "SUSE", +} + # Mappings of the possible integer values of "sum_type" on Advisory packages to their user-facing # string representation. Should mirror the createrepo_c source code: # https://github.com/rpm-software-management/createrepo_c/blob/master/src/checksum.h#L43-L54 diff --git a/pulp_rpm/app/serializers/package.py b/pulp_rpm/app/serializers/package.py index d3de956d8..ba3b52021 100644 --- a/pulp_rpm/app/serializers/package.py +++ b/pulp_rpm/app/serializers/package.py @@ -420,7 +420,6 @@ class Meta(PackageSerializer.Meta): ref_name = "RPMPackageUploadSerializer" def validate(self, data): - uploaded_file = data.get("file") artifact = data.get("artifact") upload = data.get("upload") diff --git a/pulp_rpm/app/serializers/repository.py b/pulp_rpm/app/serializers/repository.py index f7bc3cd3a..f3ecb37c7 100644 --- a/pulp_rpm/app/serializers/repository.py +++ b/pulp_rpm/app/serializers/repository.py @@ -1,3 +1,5 @@ +import json +import urllib.parse from gettext import gettext as _ from textwrap import dedent from urllib.parse import urlparse @@ -33,6 +35,7 @@ ALLOWED_PUBLISH_CHECKSUMS, CHECKSUM_CHOICES, COMPRESSION_CHOICES, + LABEL_OSV_CONFIG, LAYOUT_CHOICES, SKIP_TYPES, SYNC_POLICY_CHOICES, @@ -51,6 +54,19 @@ ALLOWED_CONTENT_CHECKSUMS = settings.ALLOWED_CONTENT_CHECKSUMS +class OsvConfigField(serializers.JSONField): + """JSONField backed by the osv.rpm.config label; reads from labels on the instance.""" + + def get_attribute(self, instance): + raw = dict(instance.pulp_labels).get(LABEL_OSV_CONFIG) + if raw is None: + return None + try: + return json.loads(urllib.parse.unquote(raw)) + except (json.JSONDecodeError, ValueError): + return None + + @extend_schema_serializer( deprecate_fields=[ "metadata_checksum_type", @@ -178,6 +194,14 @@ class RpmRepositorySerializer(RepositorySerializer): "A JSON document describing the config.repo file Pulp should generate for this repo" ), ) + osv_config = OsvConfigField( + required=False, + allow_null=True, + help_text=_( + "OSV vulnerability scanning configuration. A list of ecosystem entries, each with a " + "'name' field (e.g. 'Red Hat') and optional 'cpes' list." + ), + ) def to_representation(self, instance): data = super().to_representation(instance) @@ -207,6 +231,21 @@ def validate(self, data): {"checksum_type": _(ALLOWED_PUBLISH_CHECKSUM_ERROR_MSG)} ) + if "osv_config" in data: + osv_config = data.pop("osv_config") + from pulp_rpm.app.vuln_report import ( + OsvConfigSerializer, # noqa: avoid circular at module load + ) + + current_labels = dict(self.instance.pulp_labels) if self.instance else {} + labels = {**current_labels, **data.get("pulp_labels", {})} + if osv_config is None: + labels.pop(LABEL_OSV_CONFIG, None) + else: + OsvConfigSerializer(data={"config": osv_config}).is_valid(raise_exception=True) + labels[LABEL_OSV_CONFIG] = urllib.parse.quote(json.dumps(osv_config)) + data["pulp_labels"] = labels + validated_data = super().validate(data) return validated_data @@ -226,6 +265,7 @@ class Meta: "repo_config", "compression_type", "layout", + "osv_config", ) model = RpmRepository diff --git a/pulp_rpm/app/viewsets/repository.py b/pulp_rpm/app/viewsets/repository.py index 1f1e65847..74ecf4275 100644 --- a/pulp_rpm/app/viewsets/repository.py +++ b/pulp_rpm/app/viewsets/repository.py @@ -11,7 +11,7 @@ AsyncOperationResponseSerializer, RepositoryAddRemoveContentSerializer, ) -from pulpcore.plugin.tasking import dispatch +from pulpcore.plugin.tasking import check_content, dispatch from pulpcore.plugin.util import extract_pk from pulpcore.plugin.viewsets import ( DistributionViewSet, @@ -43,6 +43,7 @@ UlnRemoteSerializer, ) from pulp_rpm.app.tasks.signing import signed_add_and_remove +from pulp_rpm.app.vuln_report import generate_vuln_report_payloads, parse_osv_labels class RpmModifyRepositoryActionMixin(ModifyRepositoryActionMixin): @@ -318,9 +319,36 @@ class RpmRepositoryVersionViewSet(RepositoryVersionViewSet): "has_repository_model_or_domain_or_obj_perms:rpm.view_rpmrepository", ], }, + { + "action": ["vulnerability_report"], + "principal": "authenticated", + "effect": "allow", + "condition": "has_repository_model_or_domain_or_obj_perms:rpm.view_rpmrepository", + }, ], } + @extend_schema( + description="Dispatch a task to scan all packages in this repository version for known CVEs via osv.dev.", + responses={202: AsyncOperationResponseSerializer}, + ) + @action(detail=True, methods=["post"], serializer_class=None) + def vulnerability_report(self, request, repository_pk, **kwargs): + repository_version = self.get_object() + repo = repository_version.repository + + parse_osv_labels(repo.pulp_labels) + + async_result = dispatch( + check_content, + shared_resources=[repo], + kwargs={ + "func": f"{generate_vuln_report_payloads.__module__}.{generate_vuln_report_payloads.__name__}", + "args": [str(repository_version.pk)], + }, + ) + return OperationPostponedResponse(async_result, request) + class RpmRemoteViewSet(RemoteViewSet, RolesMixin): """ diff --git a/pulp_rpm/app/vuln_report.py b/pulp_rpm/app/vuln_report.py new file mode 100644 index 000000000..39902ff06 --- /dev/null +++ b/pulp_rpm/app/vuln_report.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +import json +import re +import urllib.parse +from collections.abc import AsyncGenerator, Generator +from gettext import gettext as _ +from typing import Any, TypedDict, cast + +from asgiref.sync import sync_to_async +from rest_framework import serializers +from rest_framework.serializers import ValidationError as DRFValidationError + +from pulpcore.plugin.models import RepositoryVersion # type: ignore[import-untyped] + +from pulp_rpm.app.constants import LABEL_OSV_CONFIG, SUPPORTED_ECOSYSTEMS +from pulp_rpm.app.models import Package + + +class OsvPackage(TypedDict): + name: str + ecosystem: str + + +class OsvQuery(TypedDict): + version: str + package: OsvPackage + + +class VulnReportPayload(OsvQuery): + """Format required by pulpcore's Vulnerability Report feature.""" + + content: Any + repo_version: RepositoryVersion + + +class OsvEcosystem(TypedDict): + name: str + extra_arguments: dict[str, Any] + + +# --- Serializers --- + + +class EcosystemConfigSerializer(serializers.Serializer): + name = serializers.CharField() + + def validate_name(self, value: str) -> str: + if value not in SUPPORTED_ECOSYSTEMS: + supported = ", ".join(sorted(SUPPORTED_ECOSYSTEMS)) + raise serializers.ValidationError( + _("Unsupported ecosystem '%s'. Supported: %s.") % (value, supported) + ) + return value + + +class RedHatEcosystemConfigSerializer(EcosystemConfigSerializer): + cpes = serializers.ListField( + child=serializers.CharField(), + required=True, + help_text=_("CPEs are required for Red Hat to scope the OSV query to a specific product."), + ) + + +_ECOSYSTEM_SERIALIZERS: dict[str, type[EcosystemConfigSerializer]] = { + "Red Hat": RedHatEcosystemConfigSerializer, +} + + +class OsvConfigSerializer(serializers.Serializer): + """Deserializes the osv.rpm.config label value into a list of OsvEcosystem entries.""" + + config = serializers.JSONField() + + def validate_config(self, value: Any) -> list[OsvEcosystem]: + if not isinstance(value, list) or not value: + raise serializers.ValidationError(_("Must be a non-empty JSON list.")) + + result: list[OsvEcosystem] = [] + for i, item in enumerate(value): + if not isinstance(item, dict) or "name" not in item: + raise serializers.ValidationError( + _("Item %d: each entry must be an object with a 'name' field.") % i + ) + name = item["name"] + serializer_class = _ECOSYSTEM_SERIALIZERS.get(name, EcosystemConfigSerializer) + s = serializer_class(data=item) + if not s.is_valid(): + raise serializers.ValidationError({f"item[{i}]": s.errors}) + data = cast(dict[str, Any], s.validated_data) + extra = {k: v for k, v in data.items() if k != "name"} + result.append(OsvEcosystem(name=data["name"], extra_arguments=extra)) + return result + + +# --- Helpers --- + + +_CPE_PREFIX = re.compile(r"^cpe:/[oa]:redhat") + + +def parse_osv_labels(labels: dict[str, str]) -> list[OsvEcosystem]: + """Parse and validate the osv.rpm.config label. + + Returns a list of OsvEcosystem entries. Raises DRFValidationError if absent or malformed. + The label value must be a JSON list, optionally URL-encoded to satisfy Pulp's label + constraint that forbids commas and parentheses in label values. + """ + raw = labels.get(LABEL_OSV_CONFIG) + if raw is None: + raise DRFValidationError( + {LABEL_OSV_CONFIG: _("Required label '%s' is missing.") % LABEL_OSV_CONFIG} + ) + try: + parsed = json.loads(urllib.parse.unquote(raw)) + except (json.JSONDecodeError, ValueError): + raise DRFValidationError({LABEL_OSV_CONFIG: _("Must be a valid JSON list.")}) + s = OsvConfigSerializer(data={"config": parsed}) + if not s.is_valid(): + raise DRFValidationError({LABEL_OSV_CONFIG: s.errors}) + return s.validated_data["config"] + + +def build_osv_queries( + name: str, version: str, ecosystems: list[OsvEcosystem] +) -> Generator[OsvQuery, None, None]: + """Yield OSV query dicts for the given package and ecosystems. + + For Red Hat entries with CPEs, each CPE is converted to an ecosystem string. + For all other entries, the ecosystem name is used directly. + """ + for ecosystem in ecosystems: + cpes: list[str] = ecosystem["extra_arguments"].get("cpes", []) + if cpes: + for cpe in cpes: + yield OsvQuery( + version=version, + package=OsvPackage( + name=name, ecosystem=_CPE_PREFIX.sub(ecosystem["name"], cpe) + ), + ) + else: + yield OsvQuery( + version=version, + package=OsvPackage(name=name, ecosystem=ecosystem["name"]), + ) + + +async def generate_vuln_report_payloads( + repository_version_pk: str, +) -> AsyncGenerator[VulnReportPayload, None]: + """Generator of OSV query dicts for rpm.packages in a repository version.""" + repo_version: RepositoryVersion = await RepositoryVersion.objects.aget(pk=repository_version_pk) + repo: Any = await sync_to_async(lambda: repo_version.repository)() + labels: dict[str, str] = await sync_to_async(lambda: dict(repo.pulp_labels))() + ecosystems = parse_osv_labels(labels) + + pkg_content = repo_version.content.filter(pulp_type="rpm.package") + async for content in pkg_content.aiterator(): + pkg = await Package.objects.only("name", "version").aget(pk=content.pk) + for osv_data in build_osv_queries(str(pkg.name), str(pkg.version), ecosystems): + yield VulnReportPayload( + version=osv_data["version"], + package=osv_data["package"], + content=content, + repo_version=repo_version, + ) diff --git a/pulp_rpm/pytest_plugin.py b/pulp_rpm/pytest_plugin.py index c2a8007de..4ad3278ef 100644 --- a/pulp_rpm/pytest_plugin.py +++ b/pulp_rpm/pytest_plugin.py @@ -69,16 +69,21 @@ def rpm_repository_api(rpm_client): @pytest.fixture(scope="class") -def rpm_repository_factory(rpm_repository_api, gen_object_with_cleanup): +def rpm_repository_factory(rpm_repository_api, rpm_package_api, gen_object_with_cleanup): """A factory to generate an RPM Repository with auto-deletion after the test run.""" - def _rpm_repository_factory(pulp_domain=None, **body): + def _rpm_repository_factory(pulp_domain=None, upload_packages=None, monitor_task=None, **body): data = {"name": str(uuid.uuid4())} data.update(body) kwargs = {} if pulp_domain: kwargs["pulp_domain"] = pulp_domain - return gen_object_with_cleanup(rpm_repository_api, data, **kwargs) + repo = gen_object_with_cleanup(rpm_repository_api, data, **kwargs) + if upload_packages: + for path in upload_packages: + monitor_task(rpm_package_api.create(file=str(path), repository=repo.pulp_href).task) + repo = rpm_repository_api.read(repo.pulp_href) + return repo return _rpm_repository_factory diff --git a/pulp_rpm/tests/functional/api/test_vuln_report.py b/pulp_rpm/tests/functional/api/test_vuln_report.py new file mode 100644 index 000000000..d1a3cbd13 --- /dev/null +++ b/pulp_rpm/tests/functional/api/test_vuln_report.py @@ -0,0 +1,110 @@ +"""Functional tests for the vulnerability_report action on RpmRepositoryVersionViewSet.""" + +import json +import urllib.parse + +import pytest + +from pulpcore.client.pulp_rpm.exceptions import ApiException + +from pulp_rpm.tests.functional.utils import Nevra + +EXPECTED_RHSA_IDS = [ + "RHSA-2014:0678", + "RHSA-2014:0786", + "RHSA-2014:0923", + "RHSA-2014:1023", + "RHSA-2014:1281", + "RHSA-2014:1724", + "RHSA-2014:1971", + "RHSA-2014:2010", +] + +LABEL_OSV_CONFIG = "osv.rpm.config" + + +def _osv_label(config: list) -> str: + """Serialize an OSV config list as a URL-encoded JSON string safe for Pulp labels.""" + return urllib.parse.quote(json.dumps(config)) + + +REDHAT_CPE_LABELS = { + LABEL_OSV_CONFIG: _osv_label( + [{"name": "Red Hat", "cpes": ["cpe:/o:redhat:enterprise_linux:7::workstation"]}] + ) +} + + +def test_vuln_report_redhat( + rpm_repository_factory, + rpm_create_package, + monitor_task, + rpm_repository_versions_api, + pulpcore_bindings, +): + """Known RHSA IDs appear in the report for a Red Hat config with CPEs.""" + kernel_nevra = Nevra(name="kernel", epoch=0, version="3.10.0", release="123.el7", arch="x86_64") + repo = rpm_repository_factory( + pulp_labels=REDHAT_CPE_LABELS, + upload_packages=[rpm_create_package(kernel_nevra)], + monitor_task=monitor_task, + ) + + resp = rpm_repository_versions_api.vulnerability_report(repo.latest_version_href) + monitor_task(resp.task) + + vulns_list = pulpcore_bindings.VulnReportApi.list() + assert len(vulns_list.results) > 0 + ids = {vuln["id"] for report in vulns_list.results for vuln in report.vulns} + assert set(EXPECTED_RHSA_IDS).issubset(ids) + + repo_version = rpm_repository_versions_api.read(repo.latest_version_href) + assert repo_version.vuln_report is not None + + +@pytest.mark.parametrize( + "labels,expected", + [ + pytest.param( + {}, + "Required label", + id="missing_config", + ), + pytest.param( + {LABEL_OSV_CONFIG: '[{"name": "NotAnEcosystem"}]'}, + "Unsupported ecosystem", + id="unsupported_ecosystem", + ), + pytest.param( + {LABEL_OSV_CONFIG: "not-valid-json["}, + "valid JSON", + id="malformed_config", + ), + pytest.param( + {LABEL_OSV_CONFIG: ""}, + "valid JSON", + id="empty_value", + ), + pytest.param( + {LABEL_OSV_CONFIG: " "}, + "valid JSON", + id="whitespace_only", + ), + pytest.param( + {LABEL_OSV_CONFIG: _osv_label([{"name": "Red Hat"}])}, + "cpes", + id="redhat_missing_cpes", + ), + ], +) +def test_vuln_report_invalid_config( + labels, expected, rpm_repository_factory, rpm_repository_versions_api +): + """Invalid osv.rpm.config label returns HTTP 400.""" + repo = rpm_repository_factory(pulp_labels=labels) + + with pytest.raises(ApiException) as exc: + rpm_repository_versions_api.vulnerability_report(repo.latest_version_href) + assert exc.value.status == 400 + assert LABEL_OSV_CONFIG in exc.value.body + assert expected in exc.value.body diff --git a/pulp_rpm/tests/functional/conftest.py b/pulp_rpm/tests/functional/conftest.py index 23237178e..4f60e5cff 100644 --- a/pulp_rpm/tests/functional/conftest.py +++ b/pulp_rpm/tests/functional/conftest.py @@ -3,6 +3,7 @@ import subprocess import uuid from dataclasses import dataclass +from pathlib import Path from tempfile import NamedTemporaryFile import gnupg @@ -21,6 +22,7 @@ ContentPackagelangpacksApi, ContentPackagesApi, RemotesUlnApi, + RepositoriesRpmVersionsApi, RpmCompsApi, RpmCopyApi, RpmRepositorySyncURL, @@ -35,8 +37,10 @@ RPM_SIGNED_URL, ) from pulp_rpm.tests.functional.utils import ( + Nevra, PackageListFetcher, RepositoryBuilder, + build_rpm, init_signed_repo_configuration, ) @@ -128,6 +132,11 @@ def rpm_copy_api(rpm_client): return RpmCopyApi(rpm_client) +@pytest.fixture(scope="session") +def rpm_repository_versions_api(rpm_client): + return RepositoriesRpmVersionsApi(rpm_client) + + @pytest.fixture def signed_artifact(pulpcore_bindings, tmp_path): data = requests.get(RPM_SIGNED_URL).content @@ -161,6 +170,18 @@ def _rpm_artifact_factory(url=RPM_SIGNED_URL, pulp_domain=None): return _rpm_artifact_factory +@pytest.fixture +def rpm_create_package(tmp_path): + """Return a factory that builds a minimal RPM file and returns its path.""" + + def _factory(nevra: Nevra) -> Path: + path = tmp_path / f"{nevra.to_nvra()}.rpm" + build_rpm(nevra, path) + return path + + return _factory + + @pytest.fixture def rpm_package_factory( gen_object_with_cleanup, diff --git a/pulp_rpm/tests/functional/utils.py b/pulp_rpm/tests/functional/utils.py index 36d48e79e..ff946629a 100644 --- a/pulp_rpm/tests/functional/utils.py +++ b/pulp_rpm/tests/functional/utils.py @@ -15,6 +15,7 @@ import createrepo_c as cr import pyzstd import requests +import rpm_rs from pulp_rpm.tests.functional.constants import ( LEGACY_SIGNING_KEY, @@ -113,7 +114,7 @@ def get_metadata_content_helper(base_url, repomd_elem, meta_type): class Nevra(NamedTuple): name: str - epoch: str + epoch: int version: str release: str arch: str @@ -138,7 +139,7 @@ class MetaPackage: def generate_nevra(cls, n: int) -> Nevra: return Nevra( name=f"pkg{n}-{SALT[:8]}", - epoch="0", + epoch=0, version=f"{n}.0", release=f"{n}", arch="noarch", @@ -149,6 +150,13 @@ def generate_digest(cls, n: int) -> str: return hashlib.sha256(f"digest-{SALT}-{n}".encode()).hexdigest() +def build_rpm(nevra: Nevra, path: Path) -> None: + """Build a minimal RPM file at path using rpm_rs.""" + builder = rpm_rs.PackageBuilder(nevra.name, nevra.version, "GPLv2", nevra.arch) + builder.release(nevra.release) + builder.build().write_file(path) + + def normalized_location(pkg: MetaPackage, prefix: bool = True) -> MetaPackage: """Return a copy of pkg with location set to the canonical NVRA filename.""" filename = f"{pkg.nevra.to_nvra()}.rpm" @@ -190,7 +198,7 @@ def from_pulp_repoversion(self, repoversion_href: str) -> PackageList: MetaPackage( nevra=Nevra( name=pkg.name, - epoch=pkg.epoch, + epoch=int(pkg.epoch), version=pkg.version, release=pkg.release, arch=pkg.arch, @@ -211,7 +219,7 @@ def _from_path(path: str) -> PackageList: MetaPackage( nevra=Nevra( name=p.name, - epoch=p.epoch, + epoch=int(p.epoch), version=p.version, release=p.release, arch=p.arch, @@ -240,7 +248,7 @@ def _from_http_url(base_url: str) -> PackageList: class RepositoryBuilder: - """Builds local RPM repositories from MetaPackage entries using createrepo_c.""" + """Builds a pseudo-remote RPM repository.""" def __init__(self, tmp_path: Path): self._tmp_path = tmp_path @@ -257,7 +265,7 @@ def build( cr_pkg = cr.Package() cr_pkg.name = pkg.nevra.name cr_pkg.arch = pkg.nevra.arch - cr_pkg.epoch = pkg.nevra.epoch + cr_pkg.epoch = str(pkg.nevra.epoch) cr_pkg.version = pkg.nevra.version cr_pkg.release = pkg.nevra.release cr_pkg.pkgId = pkg.digest diff --git a/pulp_rpm/tests/unit/test_serializers.py b/pulp_rpm/tests/unit/test_serializers.py new file mode 100644 index 000000000..1ec79d09c --- /dev/null +++ b/pulp_rpm/tests/unit/test_serializers.py @@ -0,0 +1,24 @@ +import json +import urllib.parse +from unittest.mock import MagicMock + +import pytest + +from pulp_rpm.app.serializers.repository import OsvConfigField + +_CONFIG = {"ecosystem": "rpm", "repo": "myrepo"} + + +@pytest.mark.parametrize( + "labels,expected", + [ + ({}, None), + ({"osv.rpm.config": urllib.parse.quote(json.dumps(_CONFIG))}, _CONFIG), + ({"osv.rpm.config": json.dumps(_CONFIG)}, _CONFIG), + ({"osv.rpm.config": "not-json"}, None), + ], +) +def test_osv_config_field_get_attribute(labels, expected): + instance = MagicMock() + instance.pulp_labels = labels + assert OsvConfigField().get_attribute(instance) == expected