Skip to content

Commit 161dc95

Browse files
authored
Bug 2023624 - Some older repos have comments that have empty comment body and user object is null. The ETL script needs to handle these better instead of crashing
1 parent f56b754 commit 161dc95

1 file changed

Lines changed: 18 additions & 5 deletions

File tree

main.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,13 @@ def extract_reviewers(
301301

302302
reviewers = github_get(session, reviewers_url).json()
303303

304-
logger.info(f"Extracted {len(reviewers)} reviewers for PR #{pr_number}")
305-
return reviewers
304+
filtered = [r for r in reviewers if r.get("user") is not None]
305+
skipped = len(reviewers) - len(filtered)
306+
if skipped:
307+
logger.info(f"Skipped {skipped} reviewer(s) with null user for PR #{pr_number}")
308+
309+
logger.info(f"Extracted {len(filtered)} reviewers for PR #{pr_number}")
310+
return filtered
306311

307312

308313
def extract_comments(
@@ -329,8 +334,16 @@ def extract_comments(
329334
logger.info(f"Comments URL: {comments_url}")
330335

331336
comments = github_get(session, comments_url).json()
332-
logger.info(f"Extracted {len(comments)} comments for PR #{pr_number}")
333-
return comments
337+
338+
filtered = [c for c in comments if c.get("user") is not None and c.get("body")]
339+
skipped = len(comments) - len(filtered)
340+
if skipped:
341+
logger.info(
342+
f"Skipped {skipped} comment(s) with null user or empty body for PR #{pr_number}"
343+
)
344+
345+
logger.info(f"Extracted {len(filtered)} comments for PR #{pr_number}")
346+
return filtered
334347

335348

336349
def sleep_for_rate_limit(resp: requests.Response) -> None:
@@ -459,7 +472,7 @@ def transform_data(raw_data: list[dict], repo: str) -> dict:
459472
"target_repository": repo,
460473
"date_reviewed": review.get("submitted_at"),
461474
"reviewer_email": None, # TODO Placeholder for reviewer email extraction logic
462-
"reviewer_username": review.get("user", {}).get("login", "None"),
475+
"reviewer_username": (review.get("user") or {}).get("login"),
463476
"status": review.get("state"),
464477
}
465478
transformed_data["reviewers"].append(transformed_reviewer)

0 commit comments

Comments
 (0)