From 6de1476cc6e2b384e125163cd93d236a935745cd Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Mon, 16 Mar 2026 15:15:47 -0400 Subject: [PATCH 1/2] Bug 2023624 - Some older repos have comments that have empty comment body and user object is null. The ETL script needs to handle these better instead of crashing --- main.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 884655f..3c2af83 100755 --- a/main.py +++ b/main.py @@ -301,8 +301,15 @@ def extract_reviewers( reviewers = github_get(session, reviewers_url).json() - logger.info(f"Extracted {len(reviewers)} reviewers for PR #{pr_number}") - return reviewers + filtered = [r for r in reviewers if r.get("user") is not None] + skipped = len(reviewers) - len(filtered) + if skipped: + logger.info( + f"Skipped {skipped} reviewer(s) with null user for PR #{pr_number}" + ) + + logger.info(f"Extracted {len(filtered)} reviewers for PR #{pr_number}") + return filtered def extract_comments( @@ -329,8 +336,16 @@ def extract_comments( logger.info(f"Comments URL: {comments_url}") comments = github_get(session, comments_url).json() - logger.info(f"Extracted {len(comments)} comments for PR #{pr_number}") - return comments + + filtered = [c for c in comments if c.get("user") is not None and c.get("body")] + skipped = len(comments) - len(filtered) + if skipped: + logger.info( + f"Skipped {skipped} comment(s) with null user or empty body for PR #{pr_number}" + ) + + logger.info(f"Extracted {len(filtered)} comments for PR #{pr_number}") + return filtered def sleep_for_rate_limit(resp: requests.Response) -> None: @@ -459,7 +474,7 @@ def transform_data(raw_data: list[dict], repo: str) -> dict: "target_repository": repo, "date_reviewed": review.get("submitted_at"), "reviewer_email": None, # TODO Placeholder for reviewer email extraction logic - "reviewer_username": review.get("user", {}).get("login", "None"), + "reviewer_username": (review.get("user") or {}).get("login"), "status": review.get("state"), } transformed_data["reviewers"].append(transformed_reviewer) From 93e248666dabb4c019e23b6f06abaeef9d06e191 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Tue, 17 Mar 2026 10:40:08 -0400 Subject: [PATCH 2/2] Black formatted --- main.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/main.py b/main.py index 3c2af83..449e4ad 100755 --- a/main.py +++ b/main.py @@ -304,9 +304,7 @@ def extract_reviewers( filtered = [r for r in reviewers if r.get("user") is not None] skipped = len(reviewers) - len(filtered) if skipped: - logger.info( - f"Skipped {skipped} reviewer(s) with null user for PR #{pr_number}" - ) + logger.info(f"Skipped {skipped} reviewer(s) with null user for PR #{pr_number}") logger.info(f"Extracted {len(filtered)} reviewers for PR #{pr_number}") return filtered