Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/+vuln-report.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added a `vulnerability_report` action to `RpmRepositoryVersionViewSet` that scans all RPM packages in a repository version for known CVEs via osv.dev. Repositories are opted in by setting the `osv.rpm.ecosystem` label (e.g., `Red Hat`); `osv.rpm.redhat.cpes` is an optional label to narrow the query to specific CPEs.
13 changes: 13 additions & 0 deletions pulp_rpm/app/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,19 @@
PACKAGES_DIRECTORY = "Packages"
DIST_TREE_MAIN_REPO_PATH = "."

LABEL_OSV_CONFIG = "osv.rpm.config"
SUPPORTED_ECOSYSTEMS = {
"AlmaLinux",
"Azure Linux",
"Mageia",
"openEuler",
"openSUSE",
"Photon OS",
"Red Hat",
"Rocky Linux",
"SUSE",
}

# Mappings of the possible integer values of "sum_type" on Advisory packages to their user-facing
# string representation. Should mirror the createrepo_c source code:
# https://github.com/rpm-software-management/createrepo_c/blob/master/src/checksum.h#L43-L54
Expand Down
1 change: 0 additions & 1 deletion pulp_rpm/app/serializers/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,6 @@ class Meta(PackageSerializer.Meta):
ref_name = "RPMPackageUploadSerializer"

def validate(self, data):

uploaded_file = data.get("file")
artifact = data.get("artifact")
upload = data.get("upload")
Expand Down
40 changes: 40 additions & 0 deletions pulp_rpm/app/serializers/repository.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json
import urllib.parse
from gettext import gettext as _
from textwrap import dedent
from urllib.parse import urlparse
Expand Down Expand Up @@ -33,6 +35,7 @@
ALLOWED_PUBLISH_CHECKSUMS,
CHECKSUM_CHOICES,
COMPRESSION_CHOICES,
LABEL_OSV_CONFIG,
LAYOUT_CHOICES,
SKIP_TYPES,
SYNC_POLICY_CHOICES,
Expand All @@ -51,6 +54,19 @@
ALLOWED_CONTENT_CHECKSUMS = settings.ALLOWED_CONTENT_CHECKSUMS


class OsvConfigField(serializers.JSONField):
"""JSONField backed by the osv.rpm.config label; reads from labels on the instance."""

def get_attribute(self, instance):
raw = dict(instance.pulp_labels).get(LABEL_OSV_CONFIG)
if raw is None:
return None
try:
return json.loads(urllib.parse.unquote(raw))
except (json.JSONDecodeError, ValueError):
return None


@extend_schema_serializer(
deprecate_fields=[
"metadata_checksum_type",
Expand Down Expand Up @@ -178,6 +194,14 @@ class RpmRepositorySerializer(RepositorySerializer):
"A JSON document describing the config.repo file Pulp should generate for this repo"
),
)
osv_config = OsvConfigField(
required=False,
allow_null=True,
help_text=_(
"OSV vulnerability scanning configuration. A list of ecosystem entries, each with a "
"'name' field (e.g. 'Red Hat') and optional 'cpes' list."
),
)

def to_representation(self, instance):
data = super().to_representation(instance)
Expand Down Expand Up @@ -207,6 +231,21 @@ def validate(self, data):
{"checksum_type": _(ALLOWED_PUBLISH_CHECKSUM_ERROR_MSG)}
)

if "osv_config" in data:
osv_config = data.pop("osv_config")
from pulp_rpm.app.vuln_report import (
OsvConfigSerializer, # noqa: avoid circular at module load
)

current_labels = dict(self.instance.pulp_labels) if self.instance else {}
labels = {**current_labels, **data.get("pulp_labels", {})}
if osv_config is None:
labels.pop(LABEL_OSV_CONFIG, None)
else:
OsvConfigSerializer(data={"config": osv_config}).is_valid(raise_exception=True)
labels[LABEL_OSV_CONFIG] = urllib.parse.quote(json.dumps(osv_config))
data["pulp_labels"] = labels

validated_data = super().validate(data)
return validated_data

Expand All @@ -226,6 +265,7 @@ class Meta:
"repo_config",
"compression_type",
"layout",
"osv_config",
)
model = RpmRepository

Expand Down
30 changes: 29 additions & 1 deletion pulp_rpm/app/viewsets/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
AsyncOperationResponseSerializer,
RepositoryAddRemoveContentSerializer,
)
from pulpcore.plugin.tasking import dispatch
from pulpcore.plugin.tasking import check_content, dispatch
from pulpcore.plugin.util import extract_pk
from pulpcore.plugin.viewsets import (
DistributionViewSet,
Expand Down Expand Up @@ -43,6 +43,7 @@
UlnRemoteSerializer,
)
from pulp_rpm.app.tasks.signing import signed_add_and_remove
from pulp_rpm.app.vuln_report import generate_vuln_report_payloads, parse_osv_labels


class RpmModifyRepositoryActionMixin(ModifyRepositoryActionMixin):
Expand Down Expand Up @@ -318,9 +319,36 @@ class RpmRepositoryVersionViewSet(RepositoryVersionViewSet):
"has_repository_model_or_domain_or_obj_perms:rpm.view_rpmrepository",
],
},
{
"action": ["vulnerability_report"],
"principal": "authenticated",
"effect": "allow",
"condition": "has_repository_model_or_domain_or_obj_perms:rpm.view_rpmrepository",
},
],
}

@extend_schema(
description="Dispatch a task to scan all packages in this repository version for known CVEs via osv.dev.",
responses={202: AsyncOperationResponseSerializer},
)
@action(detail=True, methods=["post"], serializer_class=None)
def vulnerability_report(self, request, repository_pk, **kwargs):
repository_version = self.get_object()
repo = repository_version.repository

parse_osv_labels(repo.pulp_labels)

async_result = dispatch(
check_content,
shared_resources=[repo],
kwargs={
"func": f"{generate_vuln_report_payloads.__module__}.{generate_vuln_report_payloads.__name__}",
"args": [str(repository_version.pk)],
},
)
return OperationPostponedResponse(async_result, request)


class RpmRemoteViewSet(RemoteViewSet, RolesMixin):
"""
Expand Down
167 changes: 167 additions & 0 deletions pulp_rpm/app/vuln_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
from __future__ import annotations

import json
import re
import urllib.parse
from collections.abc import AsyncGenerator, Generator
from gettext import gettext as _
from typing import Any, TypedDict, cast

from asgiref.sync import sync_to_async
from rest_framework import serializers
from rest_framework.serializers import ValidationError as DRFValidationError

from pulpcore.plugin.models import RepositoryVersion # type: ignore[import-untyped]

from pulp_rpm.app.constants import LABEL_OSV_CONFIG, SUPPORTED_ECOSYSTEMS
from pulp_rpm.app.models import Package


class OsvPackage(TypedDict):
name: str
ecosystem: str


class OsvQuery(TypedDict):
version: str
package: OsvPackage


class VulnReportPayload(OsvQuery):
"""Format required by pulpcore's Vulnerability Report feature."""

content: Any
repo_version: RepositoryVersion


class OsvEcosystem(TypedDict):
name: str
extra_arguments: dict[str, Any]


# --- Serializers ---


class EcosystemConfigSerializer(serializers.Serializer):
name = serializers.CharField()

def validate_name(self, value: str) -> str:
if value not in SUPPORTED_ECOSYSTEMS:
supported = ", ".join(sorted(SUPPORTED_ECOSYSTEMS))
raise serializers.ValidationError(
_("Unsupported ecosystem '%s'. Supported: %s.") % (value, supported)
)
return value


class RedHatEcosystemConfigSerializer(EcosystemConfigSerializer):
cpes = serializers.ListField(
child=serializers.CharField(),
required=True,
help_text=_("CPEs are required for Red Hat to scope the OSV query to a specific product."),
)


_ECOSYSTEM_SERIALIZERS: dict[str, type[EcosystemConfigSerializer]] = {
"Red Hat": RedHatEcosystemConfigSerializer,
}


class OsvConfigSerializer(serializers.Serializer):
"""Deserializes the osv.rpm.config label value into a list of OsvEcosystem entries."""

config = serializers.JSONField()

def validate_config(self, value: Any) -> list[OsvEcosystem]:
if not isinstance(value, list) or not value:
raise serializers.ValidationError(_("Must be a non-empty JSON list."))

result: list[OsvEcosystem] = []
for i, item in enumerate(value):
if not isinstance(item, dict) or "name" not in item:
raise serializers.ValidationError(
_("Item %d: each entry must be an object with a 'name' field.") % i
)
name = item["name"]
serializer_class = _ECOSYSTEM_SERIALIZERS.get(name, EcosystemConfigSerializer)
s = serializer_class(data=item)
if not s.is_valid():
raise serializers.ValidationError({f"item[{i}]": s.errors})
data = cast(dict[str, Any], s.validated_data)
extra = {k: v for k, v in data.items() if k != "name"}
result.append(OsvEcosystem(name=data["name"], extra_arguments=extra))
return result


# --- Helpers ---


_CPE_PREFIX = re.compile(r"^cpe:/[oa]:redhat")


def parse_osv_labels(labels: dict[str, str]) -> list[OsvEcosystem]:
"""Parse and validate the osv.rpm.config label.

Returns a list of OsvEcosystem entries. Raises DRFValidationError if absent or malformed.
The label value must be a JSON list, optionally URL-encoded to satisfy Pulp's label
constraint that forbids commas and parentheses in label values.
"""
raw = labels.get(LABEL_OSV_CONFIG)
if raw is None:
raise DRFValidationError(
{LABEL_OSV_CONFIG: _("Required label '%s' is missing.") % LABEL_OSV_CONFIG}
)
try:
parsed = json.loads(urllib.parse.unquote(raw))
except (json.JSONDecodeError, ValueError):
raise DRFValidationError({LABEL_OSV_CONFIG: _("Must be a valid JSON list.")})
s = OsvConfigSerializer(data={"config": parsed})
if not s.is_valid():
raise DRFValidationError({LABEL_OSV_CONFIG: s.errors})
return s.validated_data["config"]


def build_osv_queries(
name: str, version: str, ecosystems: list[OsvEcosystem]
) -> Generator[OsvQuery, None, None]:
"""Yield OSV query dicts for the given package and ecosystems.

For Red Hat entries with CPEs, each CPE is converted to an ecosystem string.
For all other entries, the ecosystem name is used directly.
"""
for ecosystem in ecosystems:
cpes: list[str] = ecosystem["extra_arguments"].get("cpes", [])
if cpes:
for cpe in cpes:
yield OsvQuery(
version=version,
package=OsvPackage(
name=name, ecosystem=_CPE_PREFIX.sub(ecosystem["name"], cpe)
),
)
else:
yield OsvQuery(
version=version,
package=OsvPackage(name=name, ecosystem=ecosystem["name"]),
)


async def generate_vuln_report_payloads(
repository_version_pk: str,
) -> AsyncGenerator[VulnReportPayload, None]:
"""Generator of OSV query dicts for rpm.packages in a repository version."""
repo_version: RepositoryVersion = await RepositoryVersion.objects.aget(pk=repository_version_pk)
repo: Any = await sync_to_async(lambda: repo_version.repository)()
labels: dict[str, str] = await sync_to_async(lambda: dict(repo.pulp_labels))()
ecosystems = parse_osv_labels(labels)

pkg_content = repo_version.content.filter(pulp_type="rpm.package")
async for content in pkg_content.aiterator():
pkg = await Package.objects.only("name", "version").aget(pk=content.pk)
for osv_data in build_osv_queries(str(pkg.name), str(pkg.version), ecosystems):
yield VulnReportPayload(
version=osv_data["version"],
package=osv_data["package"],
content=content,
repo_version=repo_version,
)
11 changes: 8 additions & 3 deletions pulp_rpm/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,21 @@ def rpm_repository_api(rpm_client):


@pytest.fixture(scope="class")
def rpm_repository_factory(rpm_repository_api, gen_object_with_cleanup):
def rpm_repository_factory(rpm_repository_api, rpm_package_api, gen_object_with_cleanup):
"""A factory to generate an RPM Repository with auto-deletion after the test run."""

def _rpm_repository_factory(pulp_domain=None, **body):
def _rpm_repository_factory(pulp_domain=None, upload_packages=None, monitor_task=None, **body):
data = {"name": str(uuid.uuid4())}
data.update(body)
kwargs = {}
if pulp_domain:
kwargs["pulp_domain"] = pulp_domain
return gen_object_with_cleanup(rpm_repository_api, data, **kwargs)
repo = gen_object_with_cleanup(rpm_repository_api, data, **kwargs)
if upload_packages:
for path in upload_packages:
monitor_task(rpm_package_api.create(file=str(path), repository=repo.pulp_href).task)
repo = rpm_repository_api.read(repo.pulp_href)
return repo

return _rpm_repository_factory

Expand Down
Loading
Loading