From a0463264b08051297a582f2afbc96d74b2dc3f1b Mon Sep 17 00:00:00 2001 From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com> Date: Sun, 17 May 2026 08:28:58 -0400 Subject: [PATCH] fix: Use HTTP requests for GitHub commit stats Replace the ThreadPoolExecutor+PyGithub approach for fetching /stats/commit_activity with direct HTTP requests via helpers.s.get. Add headers parameter, retry logic for 202 responses, handling for 204, and a retry_after option; use monotonic-based deadline and per-request timeouts. Update callers to pass headers and write raw JSON commit activity. Tests updated to mock HTTP responses, add FakeResponse.raise_for_status, and cover 202->success, 204, and timeout behavior. --- src/updater.py | 42 ++++++++++++++++++++--------- tests/unit/test_updater.py | 55 +++++++++++++++++++------------------- 2 files changed, 58 insertions(+), 39 deletions(-) diff --git a/src/updater.py b/src/updater.py index 3cbefedcc..8163463f7 100644 --- a/src/updater.py +++ b/src/updater.py @@ -2,7 +2,7 @@ import json import math import os -from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout +import time from datetime import datetime, timezone from threading import Thread @@ -193,7 +193,7 @@ def update_fb(): helpers.write_json_files(file_path=file_path, data=data) -def _get_stats_with_timeout(repo, timeout=60): +def _get_stats_with_timeout(repo, headers: dict, timeout=60, retry_after=2): """ Fetch commit activity for a repo, capping total wait time. @@ -201,23 +201,42 @@ def _get_stats_with_timeout(repo, timeout=60): ---------- repo : PyGithub Repository object. + headers : dict + HTTP headers including the GitHub authorisation token. timeout : int Maximum seconds to wait before giving up (GitHub may return 202 while - computing stats, causing PyGithub to retry indefinitely without this guard). + computing stats). + retry_after : int + Seconds to wait between 202 responses. Returns ------- list or None - Weekly commit-activity objects, or None on timeout. + Weekly commit-activity data, or None on timeout. """ - with ThreadPoolExecutor(max_workers=1) as pool: - future = pool.submit(repo.get_stats_commit_activity) - try: - return future.result(timeout=timeout) - except FuturesTimeout: + url = f'https://api.github.com/repos/{repo.owner.login}/{repo.name}/stats/commit_activity' + deadline = time.monotonic() + timeout + + while True: + remaining = deadline - time.monotonic() + if remaining <= 0: log.warning(f'Timeout fetching commit activity for {repo.name}, skipping.') return None + response = helpers.s.get( + url=url, + headers=headers, + timeout=min(helpers.DEFAULT_TIMEOUT, remaining), + ) + if response.status_code == 202: + time.sleep(min(retry_after, max(0, deadline - time.monotonic()))) + continue + if response.status_code == 204: + return None + + response.raise_for_status() + return response.json() + def _seed_star_history(repo, total: int, initial_samples: int) -> list[dict]: """ @@ -447,11 +466,10 @@ def _process_github_repo(repo, headers: dict, graphql_url: str) -> None: helpers.write_json_files(file_path=file_path, data=languages) # commit activity (last year, weekly buckets) - commit_activity = _get_stats_with_timeout(repo) + commit_activity = _get_stats_with_timeout(repo, headers) if commit_activity: - commits = [week.raw_data for week in commit_activity] file_path = os.path.join(BASE_DIR, 'github', 'commitActivity', repo.name) - helpers.write_json_files(file_path=file_path, data=commits) + helpers.write_json_files(file_path=file_path, data=commit_activity) # open pull requests pulls_data = [] diff --git a/tests/unit/test_updater.py b/tests/unit/test_updater.py index 42759f87a..23cf20e43 100644 --- a/tests/unit/test_updater.py +++ b/tests/unit/test_updater.py @@ -1,6 +1,5 @@ # standard imports import json -from concurrent.futures import TimeoutError as FuturesTimeout from datetime import datetime, timezone from types import SimpleNamespace @@ -25,6 +24,10 @@ def json(self): raise self._raises return self._payload + def raise_for_status(self): + if self.status_code >= 400: + raise requests.exceptions.HTTPError(self.text) + class FakeWeek: def __init__(self, week, total): @@ -229,38 +232,36 @@ def fake_get(url): assert 'paging' not in writes[0][1] -def test_get_stats_with_timeout_success_and_timeout(monkeypatch): - class FutureOk: - def result(self, timeout): - return [1] - - class FutureTimeout: - def result(self, timeout): - raise FuturesTimeout() - - class Pool: - def __init__(self, future): - self.future = future - - def __enter__(self): - return self +def test_get_stats_with_timeout_success_retry_and_timeout(monkeypatch): + repo = SimpleNamespace(name='x', owner=SimpleNamespace(login='owner')) + headers = {'Authorization': 'token'} + calls = [] - def __exit__(self, *args): - return False + def fake_get(url, headers, timeout): + calls.append((url, headers, timeout)) + if len(calls) == 1: + return FakeResponse(status=202) + return FakeResponse([{'week': 1, 'total': 2}]) - def submit(self, func): - return self.future + sleeps = [] + monkeypatch.setattr(updater.helpers.s, 'get', fake_get) + monkeypatch.setattr(updater.time, 'sleep', lambda seconds: sleeps.append(seconds)) + monkeypatch.setattr(updater.time, 'monotonic', lambda: 0) - monkeypatch.setattr(updater, 'ThreadPoolExecutor', lambda max_workers: Pool(FutureOk())) - repo = SimpleNamespace(name='x', get_stats_commit_activity=lambda: [1]) - assert updater._get_stats_with_timeout(repo) == [1] + assert updater._get_stats_with_timeout(repo, headers) == [{'week': 1, 'total': 2}] + assert calls[0][0] == 'https://api.github.com/repos/owner/x/stats/commit_activity' + assert calls[0][1] == headers + assert calls[0][2] == updater.helpers.DEFAULT_TIMEOUT + assert sleeps == [2] warnings = [] monkeypatch.setattr(updater.log, 'warning', lambda msg: warnings.append(msg)) - monkeypatch.setattr(updater, 'ThreadPoolExecutor', lambda max_workers: Pool(FutureTimeout())) - assert updater._get_stats_with_timeout(repo) is None + assert updater._get_stats_with_timeout(repo, headers, timeout=0) is None assert warnings + monkeypatch.setattr(updater.helpers.s, 'get', lambda url, headers, timeout: FakeResponse(status=204)) + assert updater._get_stats_with_timeout(repo, headers) is None + def test_seed_star_history(monkeypatch): repo = FakeRepo(stars=250) @@ -331,7 +332,7 @@ def test_process_github_repo(monkeypatch, tmp_path): 'save_image_from_url', lambda **kwargs: writes.append(('img', kwargs['file_path'])) ) - monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo: [FakeWeek(1, 1)]) + monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo, headers: [{'week': 1, 'total': 1}]) monkeypatch.setattr(updater, '_collect_star_history', lambda repo: [{'date': '2026-01-01', 'stars': 1}]) monkeypatch.setattr(updater, '_fetch_code_scanning_alerts', lambda repo: []) monkeypatch.setattr( @@ -357,7 +358,7 @@ def post_ok(url, json, headers): def test_process_github_repo_error_and_avatar_skip(monkeypatch, tmp_path): monkeypatch.setattr(updater, 'BASE_DIR', str(tmp_path / 'gh-pages')) monkeypatch.setattr(updater.helpers, 'write_json_files', lambda **kwargs: None) - monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo: None) + monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo, headers: None) monkeypatch.setattr(updater, '_collect_star_history', lambda repo: []) monkeypatch.setattr(updater, '_fetch_code_scanning_alerts', lambda repo: []) monkeypatch.setattr(updater, '_build_code_scanning_history', lambda alerts: [])