diff --git a/.github/tests/test_enforce_backport_fixes.py b/.github/tests/test_enforce_backport_fixes.py new file mode 100644 index 0000000..9117e9b --- /dev/null +++ b/.github/tests/test_enforce_backport_fixes.py @@ -0,0 +1,213 @@ +""" +Unit tests for enforce_backport_fixes_reference (RELENG-175) in jira_sync_modules. + +The function: + - only acts on backport/ labels (not backport/none or other labels), + - skips enforcement when the PR body links a valid issue, + - otherwise comments once (mentioning author + assignees) and removes every + backport label. + +Network is fully mocked: extract_jira_keys decides validity and _gh_api stands +in for every GitHub REST call. +""" + +import os +import sys +import importlib + +import pytest + +# jira_sync_modules lives in the top-level scripts/ dir (stdlib-only, safe to import). +_SCRIPTS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "scripts")) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) + +jsm = importlib.import_module("jira_sync_modules") + + +# --------------------------------------------------------------------------- +# BACKPORT_LABEL_RE +# --------------------------------------------------------------------------- + +class TestBackportLabelRegex: + @pytest.mark.parametrize("label", [ + "backport/2025.4", + "backport/2025.40", + "backport/manager-3.4", + ]) + def test_matches_release_labels(self, label): + assert jsm.BACKPORT_LABEL_RE.match(label) + + @pytest.mark.parametrize("label", [ + "backport/none", + "backport/", + "backport/2025", + "area/build", + "P1", + ]) + def test_rejects_non_release_labels(self, label): + assert not jsm.BACKPORT_LABEL_RE.match(label) + + +# --------------------------------------------------------------------------- +# enforce_backport_fixes_reference +# --------------------------------------------------------------------------- + +class _GhRecorder: + """Records _gh_api calls and returns canned responses by (method, url-substring).""" + + def __init__(self, pr_json="{}", comments_json="[]", labels_json="[]"): + self.calls = [] + self._pr_json = pr_json + self._comments_json = comments_json + self._labels_json = labels_json + + def __call__(self, method, url, gh_token, payload=None): + self.calls.append((method, url, payload)) + if method == "GET" and "/pulls/" in url: + return 200, self._pr_json + if method == "GET" and "/comments" in url: + return 200, self._comments_json + if method == "GET" and url.endswith("/labels"): + return 200, self._labels_json + if method == "POST" and "/comments" in url: + return 201, "{}" + if method == "DELETE": + return 200, "{}" + return 200, "{}" + + def posted_comments(self): + return [p["body"] for (m, u, p) in self.calls if m == "POST" and "/comments" in u] + + def deleted_labels(self): + return [u.rsplit("/", 1)[-1] for (m, u, p) in self.calls if m == "DELETE"] + + +class TestBodyLinksExistingIssue: + """_pr_body_links_existing_issue combines project validation with existence.""" + + def test_no_keys_is_invalid(self, monkeypatch): + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["__NO_KEYS_FOUND__"]) + assert jsm._pr_body_links_existing_issue("t", "b", "u:p") is False + + def test_one_existing_key_is_valid(self, monkeypatch): + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["A-1", "A-2"]) + # First key is 404, second exists -> valid (any existing issue is enough). + monkeypatch.setattr(jsm, "_jira_issue_exists", lambda key, auth: key == "A-2") + assert jsm._pr_body_links_existing_issue("t", "b", "u:p") is True + + def test_all_absent_is_invalid(self, monkeypatch): + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["A-1"]) + monkeypatch.setattr(jsm, "_jira_issue_exists", lambda key, auth: False) + assert jsm._pr_body_links_existing_issue("t", "b", "u:p") is False + + def test_unknown_existence_fails_open(self, monkeypatch): + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["A-1"]) + monkeypatch.setattr(jsm, "_jira_issue_exists", lambda key, auth: None) + assert jsm._pr_body_links_existing_issue("t", "b", "u:p") is True + + +def test_non_backport_label_is_a_noop(monkeypatch): + called = [] + monkeypatch.setattr(jsm, "_gh_api", lambda *a, **k: called.append(a) or (200, "{}")) + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: pytest.fail("should not validate")) + + result = jsm.enforce_backport_fixes_reference( + "title", "body", 1, "area/build", "scylladb/scylladb", "tok", "user:pass" + ) + assert result is False + assert called == [] # no GitHub calls at all + + +def test_valid_existing_reference_allows_backport(monkeypatch): + rec = _GhRecorder() + monkeypatch.setattr(jsm, "_gh_api", rec) + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["SCYLLADB-123"]) + monkeypatch.setattr(jsm, "_jira_issue_exists", lambda key, auth: True) + + result = jsm.enforce_backport_fixes_reference( + "title", "Fixes: SCYLLADB-123", 7, "backport/2025.4", + "scylladb/scylladb", "tok", "user:pass", + ) + assert result is False + assert rec.calls == [] # validated via Jira, no comment / no label removal + + +def test_bogus_issue_number_is_enforced(monkeypatch): + """A real project with a non-existent issue number (404) must be rejected.""" + rec = _GhRecorder( + pr_json='{"user": {"login": "alice"}, "assignees": []}', + labels_json='[{"name": "backport/2025.4"}]', + ) + monkeypatch.setattr(jsm, "_gh_api", rec) + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["SCYLLADB-9898758758"]) + monkeypatch.setattr(jsm, "_jira_issue_exists", lambda key, auth: False) + + result = jsm.enforce_backport_fixes_reference( + "title", "Fixes: SCYLLADB-9898758758", 7, "backport/2025.4", + "scylladb/scylladb", "tok", "user:pass", + ) + assert result is True + assert len(rec.posted_comments()) == 1 + assert "backport%2F2025.4" in rec.deleted_labels() + + +def test_existence_unknown_fails_open(monkeypatch): + """If Jira can't confirm existence (None), fail open and allow the backport.""" + rec = _GhRecorder() + monkeypatch.setattr(jsm, "_gh_api", rec) + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["SCYLLADB-123"]) + monkeypatch.setattr(jsm, "_jira_issue_exists", lambda key, auth: None) + + result = jsm.enforce_backport_fixes_reference( + "title", "Fixes: SCYLLADB-123", 7, "backport/2025.4", + "scylladb/scylladb", "tok", "user:pass", + ) + assert result is False + assert rec.calls == [] # fail-open: no enforcement action taken + + +def test_missing_reference_comments_and_removes_labels(monkeypatch): + rec = _GhRecorder( + pr_json='{"user": {"login": "alice"}, "assignees": [{"login": "bob"}]}', + comments_json="[]", + labels_json='[{"name": "backport/2025.4"}, {"name": "backport/2025.3"}, {"name": "P1"}]', + ) + monkeypatch.setattr(jsm, "_gh_api", rec) + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["__NO_KEYS_FOUND__"]) + + result = jsm.enforce_backport_fixes_reference( + "title", "no reference here", 7, "backport/2025.4", + "scylladb/scylladb", "tok", "user:pass", + ) + assert result is True + + comments = rec.posted_comments() + assert len(comments) == 1 + assert "@alice" in comments[0] + assert "@bob" in comments[0] + assert jsm.REQUIRED_FIXES_COMMENT in comments[0] + + # Both backport labels removed (URL-encoded); the P1 label is left alone. + deleted = rec.deleted_labels() + assert "backport%2F2025.4" in deleted + assert "backport%2F2025.3" in deleted + assert all("P1" not in d for d in deleted) + + +def test_does_not_comment_twice(monkeypatch): + rec = _GhRecorder( + pr_json='{"user": {"login": "alice"}, "assignees": []}', + comments_json='[{"body": "%s"}]' % jsm.REQUIRED_FIXES_COMMENT.replace("\n", "\\n"), + labels_json='[{"name": "backport/2025.4"}]', + ) + monkeypatch.setattr(jsm, "_gh_api", rec) + monkeypatch.setattr(jsm, "extract_jira_keys", lambda *a, **k: ["__NO_KEYS_FOUND__"]) + + result = jsm.enforce_backport_fixes_reference( + "title", "no reference", 7, "backport/2025.4", + "scylladb/scylladb", "tok", "user:pass", + ) + assert result is True + assert rec.posted_comments() == [] # comment already present; not re-posted + assert "backport%2F2025.4" in rec.deleted_labels() # label still removed diff --git a/scripts/jira_sync_logic.py b/scripts/jira_sync_logic.py index bd4b382..92dea7b 100644 --- a/scripts/jira_sync_logic.py +++ b/scripts/jira_sync_logic.py @@ -23,6 +23,8 @@ add_comment_to_jira, remove_label_from_jira_issue, get_done_issue_keys, + enforce_backport_fixes_reference, + BACKPORT_LABEL_RE, ) # Sentinel value returned by extract_jira_keys when no keys are found. @@ -71,6 +73,18 @@ def manage_labeled_gh_event( print(f" pr_number = {pr_number!r}") print(f" triggering_label = {triggering_label!r}") print(f" owner_repo = {owner_repo!r}") + + # --- Enforce a valid Fixes: reference on backport labels --- + # backport/ labels are excluded from Jira sync (below), but when one + # is added we first require the PR body to link a valid issue. If it does not, + # this comments and strips the backport label(s); either way no Jira sync runs. + if BACKPORT_LABEL_RE.match(triggering_label): + print(f"Backport label '{triggering_label}' added: requiring a valid Fixes: reference in the PR body.") + enforce_backport_fixes_reference( + pr_title, pr_body, pr_number, triggering_label, + owner_repo, gh_token, jira_auth, + ) + return # --- Early exit: excluded labels --- if _is_excluded_label(triggering_label): diff --git a/scripts/jira_sync_modules.py b/scripts/jira_sync_modules.py index a2b9c6f..d53d008 100644 --- a/scripts/jira_sync_modules.py +++ b/scripts/jira_sync_modules.py @@ -28,6 +28,7 @@ import sys import time from datetime import date +from urllib.parse import quote from urllib.request import Request, urlopen from urllib.error import URLError, HTTPError @@ -59,6 +60,18 @@ # Priority names recognised as Jira priority values _PRIORITY_NAMES = {"P0", "P1", "P2", "P3", "P4"} +# Backport labels look like backport/2025.4 or backport/manager-3.4. +# backport/none (the opt-out label) deliberately does not match, so it is never +# enforced. +BACKPORT_LABEL_RE = re.compile(r"^backport/(?:manager-)?\d+\.\d+$") + +# Message posted (verbatim, per RELENG-175) when a backport label is added to a +# PR whose body does not link a valid issue. +REQUIRED_FIXES_COMMENT = ( + "PR body do not contain a Fixes reference to an issue and can not be backported\n" + "please update PR body with a valid ref to an issue and add the backport label again." +) + def _sanitize(text: str) -> str: """Remove carriage returns and backticks (matches the shell workflow).""" @@ -1281,3 +1294,181 @@ def get_done_issue_keys(details_csv: str) -> set[str]: done_keys.add(key) return done_keys + + +# --------------------------------------------------------------------------- +# enforce_backport_fixes_reference (RELENG-175) +# --------------------------------------------------------------------------- + + +def _jira_issue_exists(issue_key: str, jira_auth: str) -> bool | None: + """Return whether a Jira issue exists. + + True - the issue exists (HTTP 200). + False - the issue does not exist (HTTP 404). + None - existence could not be determined (no credentials, or a transient + error); callers should fail open. + """ + if not jira_auth: + return None + url = f"{JIRA_BASE_URL}/rest/api/3/issue/{issue_key}?fields=key" + encoded = base64.b64encode(jira_auth.encode()).decode() + req = Request(url) + req.add_header("Accept", "application/json") + req.add_header("Authorization", f"Basic {encoded}") + try: + with urlopen(req) as resp: + return resp.getcode() == 200 + except HTTPError as exc: + if exc.code == 404: + return False + print(f"Warning: Jira issue lookup for {issue_key} failed: {exc}") + return None + except URLError as exc: + print(f"Warning: Jira issue lookup for {issue_key} failed: {exc}") + return None + + +def _pr_body_links_existing_issue(pr_title: str, pr_body: str, jira_auth: str) -> bool: + """True if the PR body links a closing-keyword reference to an existing issue. + + Builds on extract_jira_keys (closing-keyword detection + project-prefix + validation) and additionally confirms the referenced issue actually exists, + so a valid project with a bogus issue number (e.g. SCYLLADB-9898758758) is + rejected, not just an unknown project (e.g. AAA-123). If existence can not be + confirmed for any candidate key (Jira unreachable), fail open and treat the + reference as valid so a real PR is not penalised for a transient outage. + """ + keys = extract_jira_keys(pr_title, pr_body, jira_auth) + if keys == ["__NO_KEYS_FOUND__"]: + return False + statuses = [_jira_issue_exists(key, jira_auth) for key in keys] + if any(status is True for status in statuses): + return True + if any(status is None for status in statuses): + # Existence could not be determined (e.g. Jira unreachable or a non-404 + # error from the API). Fail open so a transient outage never blocks a PR + # whose reference may well be valid. + return True + return False # every candidate key was definitively absent (404) + + +def _build_pr_mentions(pr: dict) -> str: + """Build a '@author @assignee...' prefix, de-duplicated and order-preserving. + + The author comes first, then assignees; dict.fromkeys removes duplicates + while keeping first-seen order (the author is frequently also an assignee). + """ + logins: list[str] = [] + author = (pr.get("user") or {}).get("login") + if author: + logins.append(author) + for assignee in pr.get("assignees") or []: + login = assignee.get("login") + if login: + logins.append(login) + return " ".join(f"@{login}" for login in dict.fromkeys(logins)) + + +def _enforcement_comment_present(owner_repo: str, pr_number: int, gh_token: str) -> bool: + """Return True if the RELENG-175 enforcement comment is already on the PR. + + Adding several backport labels at once fires one 'labeled' event each, so the + comment must be posted only once. A freshly-labeled PR has very few comments, + so the first 100 are enough to spot a prior enforcement comment. + """ + code, body = _gh_api( + "GET", + f"https://api.github.com/repos/{owner_repo}/issues/{pr_number}/comments?per_page=100", + gh_token, + ) + if code != 200: + print(f"Warning: could not read comments on PR #{pr_number} (HTTP {code})") + return False + return any(REQUIRED_FIXES_COMMENT in (c.get("body") or "") for c in json.loads(body)) + + +def _remove_backport_labels(owner_repo: str, pr_number: int, gh_token: str) -> None: + """Remove every backport/ label currently on the PR.""" + issue_api = f"https://api.github.com/repos/{owner_repo}/issues/{pr_number}" + code, body = _gh_api("GET", f"{issue_api}/labels", gh_token) + if code != 200: + print(f"Warning: could not list labels on PR #{pr_number} (HTTP {code})") + return + for label in json.loads(body): + name = label.get("name", "") + if not BACKPORT_LABEL_RE.match(name): + continue + del_code, del_body = _gh_api( + "DELETE", f"{issue_api}/labels/{quote(name, safe='')}", gh_token + ) + if del_code in (200, 204): + print(f"Removed backport label '{name}' from PR #{pr_number}") + else: + print(f"Warning: failed to remove '{name}' (HTTP {del_code}): {del_body[:200]}") + + +def enforce_backport_fixes_reference( + pr_title: str, + pr_body: str, + pr_number: int, + triggering_label: str, + owner_repo: str, + gh_token: str, + jira_auth: str, +) -> bool: + """Enforce a valid 'Fixes:' issue reference when a backport label is added (RELENG-175). + + When a ``backport/`` label is added to a PR whose body does not link + a valid issue, comment once - mentioning the author and assignee(s) - and + remove every backport label, so the PR can not be backported until its body + is fixed and the label re-added. + + "Valid" reuses extract_jira_keys (closing-keyword reference to a Jira key + whose project prefix is a real ScyllaDB project) and additionally confirms + the referenced issue actually exists, so both an unknown project (AAA-123) + and a bogus issue number in a real project (SCYLLADB-9898758758) are + rejected. backport/none does not match BACKPORT_LABEL_RE and is therefore + never enforced. + + Returns True when an enforcement action was taken (comment and/or label + removal), False otherwise. + """ + if not BACKPORT_LABEL_RE.match(triggering_label): + print(f"Label '{triggering_label}' is not a backport/ label; nothing to enforce.") + return False + + # Body-only scan: the requirement is about the PR body linking an issue, so + # commit messages are intentionally not scanned here. + if _pr_body_links_existing_issue(pr_title, pr_body, jira_auth): + print(f"PR #{pr_number} links a valid, existing issue; backport label '{triggering_label}' is allowed.") + return False + + print(f"PR #{pr_number} body has no valid Fixes: reference; commenting and removing the backport label.") + + code, body = _gh_api( + "GET", f"https://api.github.com/repos/{owner_repo}/pulls/{pr_number}", gh_token + ) + pr = json.loads(body) if code == 200 else {} + if code != 200: + print(f"Warning: could not fetch PR #{pr_number} (HTTP {code}); commenting without mentions.") + + mentions = _build_pr_mentions(pr) + comment = f"{mentions}\n\n{REQUIRED_FIXES_COMMENT}" if mentions else REQUIRED_FIXES_COMMENT + + if _enforcement_comment_present(owner_repo, pr_number, gh_token): + print(f"Enforcement comment already present on PR #{pr_number}; not commenting again.") + else: + c_code, c_body = _gh_api( + "POST", + f"https://api.github.com/repos/{owner_repo}/issues/{pr_number}/comments", + gh_token, + {"body": comment}, + ) + if c_code in (200, 201): + print(f"Posted enforcement comment on PR #{pr_number}") + else: + print(f"Warning: failed to comment on PR #{pr_number} (HTTP {c_code}): {c_body[:200]}") + + _remove_backport_labels(owner_repo, pr_number, gh_token) + return True