@@ -301,8 +301,13 @@ def extract_reviewers(
301301
302302 reviewers = github_get (session , reviewers_url ).json ()
303303
304- logger .info (f"Extracted { len (reviewers )} reviewers for PR #{ pr_number } " )
305- return reviewers
304+ filtered = [r for r in reviewers if r .get ("user" ) is not None ]
305+ skipped = len (reviewers ) - len (filtered )
306+ if skipped :
307+ logger .info (f"Skipped { skipped } reviewer(s) with null user for PR #{ pr_number } " )
308+
309+ logger .info (f"Extracted { len (filtered )} reviewers for PR #{ pr_number } " )
310+ return filtered
306311
307312
308313def extract_comments (
@@ -329,8 +334,16 @@ def extract_comments(
329334 logger .info (f"Comments URL: { comments_url } " )
330335
331336 comments = github_get (session , comments_url ).json ()
332- logger .info (f"Extracted { len (comments )} comments for PR #{ pr_number } " )
333- return comments
337+
338+ filtered = [c for c in comments if c .get ("user" ) is not None and c .get ("body" )]
339+ skipped = len (comments ) - len (filtered )
340+ if skipped :
341+ logger .info (
342+ f"Skipped { skipped } comment(s) with null user or empty body for PR #{ pr_number } "
343+ )
344+
345+ logger .info (f"Extracted { len (filtered )} comments for PR #{ pr_number } " )
346+ return filtered
334347
335348
336349def sleep_for_rate_limit (resp : requests .Response ) -> None :
@@ -459,7 +472,7 @@ def transform_data(raw_data: list[dict], repo: str) -> dict:
459472 "target_repository" : repo ,
460473 "date_reviewed" : review .get ("submitted_at" ),
461474 "reviewer_email" : None , # TODO Placeholder for reviewer email extraction logic
462- "reviewer_username" : review .get ("user" , {}).get ("login" , "None " ),
475+ "reviewer_username" : ( review .get ("user" ) or {}).get ("login" ),
463476 "status" : review .get ("state" ),
464477 }
465478 transformed_data ["reviewers" ].append (transformed_reviewer )
0 commit comments