From ada05a571d3328cd13c9bc3107b4c7279d2e5b96 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 21 May 2026 14:21:11 -0400 Subject: [PATCH] Skip comments that return a 410 Gone error from github as well. Signed-off-by: Adrian Edwards --- collectoss/tasks/github/messages.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/collectoss/tasks/github/messages.py b/collectoss/tasks/github/messages.py index 342eeb2ca..8904c72b2 100644 --- a/collectoss/tasks/github/messages.py +++ b/collectoss/tasks/github/messages.py @@ -4,7 +4,7 @@ from collectoss.tasks.init.celery_app import celery_app as celery from collectoss.tasks.init.celery_app import CoreRepoCollectionTask from collectoss.application.db.data_parse import * -from collectoss.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException +from collectoss.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException, ResourceGoneException from collectoss.tasks.github.util.github_task_session import GithubTaskManifest from collectoss.tasks.util.worker_util import remove_duplicate_dicts from collectoss.tasks.github.util.util import get_owner_repo @@ -127,6 +127,9 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger except UrlNotFoundException: logger.info(f"{task_name}: PR or issue comment url of {comment_url} returned 404. Skipping.") skipped_urls += 1 + except ResourceGoneException: + logger.info(f"{task_name}: PR or issue comment url of {comment_url} returned 410. Skipping.") + skipped_urls += 1 if len(all_data) >= message_batch_size: process_messages(all_data, task_name, repo_id, logger, db_session)