diff --git a/.github/workflows/update-pages.yml b/.github/workflows/update-pages.yml index 2c9c44b3e..062f15690 100644 --- a/.github/workflows/update-pages.yml +++ b/.github/workflows/update-pages.yml @@ -35,6 +35,16 @@ jobs: python -m pip install --upgrade pip setuptools wheel python -m pip install . + - name: Restore generated data cache + shell: bash + run: | + mkdir -p gh-pages + git fetch --depth=1 origin gh-pages + git archive origin/gh-pages github/commitActivity | tar -x -C gh-pages + if git cat-file -e origin/gh-pages:github/commitActivityHashes; then + git archive origin/gh-pages github/commitActivityHashes | tar -x -C gh-pages + fi + - name: Collect data env: DASHBOARD_AUR_REPOS: sunshine,sunshine-bin,sunshine-git diff --git a/src/updater.py b/src/updater.py index 3cbefedcc..c1b2884c0 100644 --- a/src/updater.py +++ b/src/updater.py @@ -2,8 +2,8 @@ import json import math import os -from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout -from datetime import datetime, timezone +from queue import Queue +from datetime import datetime, timedelta, timezone from threading import Thread # lib imports @@ -18,6 +18,11 @@ from src import helpers from src.logger import log +COMMIT_ACTIVITY_READY = 'ready' +COMMIT_ACTIVITY_PENDING = 'pending' +COMMIT_ACTIVITY_FAILED = 'failed' +GITHUB_REPO_STEP_TIMEOUT = 90 + def update_aur(aur_repos: list): """ @@ -193,30 +198,276 @@ def update_fb(): helpers.write_json_files(file_path=file_path, data=data) -def _get_stats_with_timeout(repo, timeout=60): +def _commit_participation_url(repo) -> str: + """ + Build the GitHub REST URL for a repository's weekly commit participation. + + Parameters + ---------- + repo : + PyGithub Repository object. + + Returns + ------- + str + GitHub REST API URL. + """ + return f'https://api.github.com/repos/{repo.owner.login}/{repo.name}/stats/participation' + + +def _commit_activity_cache_path(repo) -> str: + """ + Return the cache path for a repository's weekly commit activity. + + Parameters + ---------- + repo : + PyGithub Repository object. + + Returns + ------- + str + File path without the ``.json`` extension. + """ + return os.path.join(BASE_DIR, 'github', 'commitActivity', repo.name) + + +def _commit_activity_hash_cache_path(repo) -> str: + """ + Return the cache path for a repository's commit-activity source SHA. + + Parameters + ---------- + repo : + PyGithub Repository object. + + Returns + ------- + str + File path without the ``.json`` extension. + """ + return os.path.join(BASE_DIR, 'github', 'commitActivityHashes', repo.name) + + +def _has_cached_commit_activity(repo) -> bool: + """ + Return whether cached weekly commit activity exists for a repository. + + Parameters + ---------- + repo : + PyGithub Repository object. + + Returns + ------- + bool + True when a valid cached stats file exists. + """ + try: + with open(f'{_commit_activity_cache_path(repo)}.json') as f: + return isinstance(json.load(f), list) + except Exception: + return False + + +def _cached_commit_activity_sha(repo) -> str | None: + """ + Return the cached default-branch SHA for a repository's commit activity. + + Parameters + ---------- + repo : + PyGithub Repository object. + + Returns + ------- + str or None + Cached SHA when available. + """ + try: + with open(f'{_commit_activity_hash_cache_path(repo)}.json') as f: + data = json.load(f) + except Exception: + return None + + sha = data.get('sha') if isinstance(data, dict) else None + return sha if isinstance(sha, str) else None + + +def _default_branch_sha(repo) -> str: + """ + Return the current default-branch commit SHA for a repository. + + Parameters + ---------- + repo : + PyGithub Repository object. + + Returns + ------- + str + Default-branch commit SHA. + """ + return repo.get_branch(repo.default_branch).commit.sha + + +def _write_commit_activity(repo, commit_activity: list, sha: str | None = None) -> None: + """ + Write weekly commit activity for a repository. + + Parameters + ---------- + repo : + PyGithub Repository object. + commit_activity : list + Weekly commit activity records from GitHub's REST API. + sha : str or None + Default-branch commit SHA that produced the stats. + """ + helpers.write_json_files(file_path=_commit_activity_cache_path(repo), data=commit_activity) + if sha: + helpers.write_json_files(file_path=_commit_activity_hash_cache_path(repo), data={'sha': sha}) + + +def _run_github_repo_step(repo, step: str, func: callable, default=None, timeout: int = GITHUB_REPO_STEP_TIMEOUT): """ - Fetch commit activity for a repo, capping total wait time. + Run an optional per-repository GitHub step with a total timeout. Parameters ---------- repo : PyGithub Repository object. + step : str + Human-readable step name for logs. + func : callable + Function to run. + default : + Value returned when the step errors or times out. timeout : int - Maximum seconds to wait before giving up (GitHub may return 202 while - computing stats, causing PyGithub to retry indefinitely without this guard). + Maximum seconds to wait for the step. Returns ------- - list or None - Weekly commit-activity objects, or None on timeout. + any + The callable result, or ``default`` when the step fails. """ - with ThreadPoolExecutor(max_workers=1) as pool: - future = pool.submit(repo.get_stats_commit_activity) + result_queue = Queue(maxsize=1) + + def runner(): try: - return future.result(timeout=timeout) - except FuturesTimeout: - log.warning(f'Timeout fetching commit activity for {repo.name}, skipping.') - return None + result_queue.put((True, func())) + except Exception as e: + result_queue.put((False, e)) + + thread = Thread(target=runner, daemon=True) + thread.start() + thread.join(timeout=timeout) + + if thread.is_alive(): + log.warning(f'Timeout after {timeout}s while running GitHub {step} for {repo.name}, skipping.') + return default + + success, value = result_queue.get() + if success: + return value + + log.warning(f'Error running GitHub {step} for {repo.name}: {value}') + return default + + +def _participation_to_commit_activity(participation: dict) -> list[dict]: + """ + Convert GitHub participation stats into commit-activity-shaped records. + + Parameters + ---------- + participation : dict + Response body from ``/stats/participation``. + + Returns + ------- + list + Weekly commit activity records with ``week`` and ``total`` keys. + """ + totals = participation.get('all', []) + if not isinstance(totals, list): + return [] + + today = datetime.now(tz=timezone.utc).date() + days_since_sunday = (today.weekday() + 1) % 7 + newest_week = today - timedelta(days=days_since_sunday) + + return [ + { + 'days': [0, 0, 0, 0, 0, 0, 0], + 'total': total, + 'week': int( + datetime.combine( + newest_week - timedelta(weeks=len(totals) - index - 1), + datetime.min.time(), + tzinfo=timezone.utc, + ).timestamp() + ), + } + for index, total in enumerate(totals) + if isinstance(total, int) + ] + + +def _fetch_commit_activity(repo, headers: dict, sha: str | None = None) -> str: + """ + Fetch weekly total commit counts for a repository. + + GitHub's ``/stats/commit_activity`` endpoint can return ``202`` for a long + time in CI. The dashboard only charts weekly totals, so use + ``/stats/participation`` and keep writing the existing ``commitActivity`` + cache files for builder compatibility. + + Parameters + ---------- + repo : + PyGithub Repository object. + headers : dict + HTTP headers including the GitHub authorisation token. + sha : str or None + Default-branch commit SHA that produced the stats. + + Returns + ------- + str + One of ``COMMIT_ACTIVITY_READY``, ``COMMIT_ACTIVITY_PENDING``, or + ``COMMIT_ACTIVITY_FAILED``. + """ + # Use participation instead of commit_activity because the dashboard only + # needs weekly totals, and commit_activity can remain at 202 in CI. + url = _commit_participation_url(repo) + try: + response = helpers.s.get(url=url, headers=headers) + except requests.exceptions.RequestException as e: + log.warning(f'Error fetching commit activity for {repo.name}: {e}') + return COMMIT_ACTIVITY_FAILED + + if response.status_code == 202: + return COMMIT_ACTIVITY_PENDING + + try: + data = response.json() + except Exception as e: + log.warning(f'Error parsing commit activity for {repo.name}: {e}') + return COMMIT_ACTIVITY_FAILED + + if response.status_code != 200: + message = data.get('message', response.text) if isinstance(data, dict) else response.text + log.warning(f'Error fetching commit activity for {repo.name}: {message}') + return COMMIT_ACTIVITY_FAILED + + commit_activity = _participation_to_commit_activity(data) + if not commit_activity: + log.warning(f'Unexpected commit activity response for {repo.name}: {data}') + return COMMIT_ACTIVITY_FAILED + + _write_commit_activity(repo, commit_activity, sha) + return COMMIT_ACTIVITY_READY def _seed_star_history(repo, total: int, initial_samples: int) -> list[dict]: @@ -428,32 +679,20 @@ def _build_code_scanning_history(alerts: list) -> list[dict]: ] -def _process_github_repo(repo, headers: dict, graphql_url: str) -> None: +def _collect_open_pulls(repo) -> list[dict]: """ - Collect and cache all per-repository data for a single GitHub repo. + Fetch open pull request summary data for a repository. Parameters ---------- repo : PyGithub Repository object. - headers : dict - HTTP headers including the GitHub authorisation token. - graphql_url : str - GitHub GraphQL endpoint URL. - """ - # languages - languages = repo.get_languages() - file_path = os.path.join(BASE_DIR, 'github', 'languages', repo.name) - helpers.write_json_files(file_path=file_path, data=languages) - # commit activity (last year, weekly buckets) - commit_activity = _get_stats_with_timeout(repo) - if commit_activity: - commits = [week.raw_data for week in commit_activity] - file_path = os.path.join(BASE_DIR, 'github', 'commitActivity', repo.name) - helpers.write_json_files(file_path=file_path, data=commits) - - # open pull requests + Returns + ------- + list + Pull request summary dictionaries. + """ pulls_data = [] for pr in repo.get_pulls(state='open'): pulls_data.append({ @@ -467,32 +706,27 @@ def _process_github_repo(repo, headers: dict, graphql_url: str) -> None: 'draft': pr.draft, 'milestone': pr.milestone.title if pr.milestone else None, }) - file_path = os.path.join(BASE_DIR, 'github', 'pulls', repo.name) - helpers.write_json_files(file_path=file_path, data=pulls_data) + return pulls_data - # open code scanning alerts and per-day history - alerts = _fetch_code_scanning_alerts(repo) - open_alert_count = sum( - 1 for a in alerts if getattr(a, 'state', None) == 'open' - ) - file_path = os.path.join(BASE_DIR, 'github', 'codeScanning', repo.name) - helpers.write_json_files(file_path=file_path, data={ - 'repo': repo.name, - 'open': open_alert_count, - 'updated_at': datetime.now(tz=timezone.utc).isoformat(), - }) - code_scanning_history = _build_code_scanning_history(alerts) - file_path = os.path.join(BASE_DIR, 'github', 'codeScanningHistory', repo.name) - helpers.write_json_files(file_path=file_path, data=code_scanning_history) +def _fetch_open_graph_image_url(repo, headers: dict, graphql_url: str) -> str: + """ + Fetch a repository's OpenGraph image URL from GitHub GraphQL. - # star history (sampled to cap API calls) - star_history = _collect_star_history(repo) - if star_history: - file_path = os.path.join(BASE_DIR, 'github', 'starHistory', repo.name) - helpers.write_json_files(file_path=file_path, data=star_history) + Parameters + ---------- + repo : + PyGithub Repository object. + headers : dict + HTTP headers including the GitHub authorisation token. + graphql_url : str + GitHub GraphQL endpoint URL. - # openGraphImages - uses GraphQL + Returns + ------- + str + OpenGraph image URL. + """ query = """ { repository(owner: "%s", name: "%s") { @@ -504,18 +738,127 @@ def _process_github_repo(repo, headers: dict, graphql_url: str) -> None: response = helpers.s.post(url=graphql_url, json={'query': query}, headers=headers) repo_data = response.json() try: - image_url = repo_data['data']['repository']['openGraphImageUrl'] + return repo_data['data']['repository']['openGraphImageUrl'] except KeyError: - log.error(f'Error: update_github: {repo_data}') - raise SystemExit('"GITHUB_TOKEN" is invalid.') - if 'avatars' not in image_url: + raise RuntimeError(f'Error: update_github: {repo_data}') from None + + +def _collect_commit_activity(repos: list, headers: dict) -> None: + """ + Collect weekly commit totals for active repositories. + + GitHub caches repository stats by the current default-branch SHA. Reuse + cached files while the SHA matches, and refresh only when the SHA changes + or when no cached stats file exists. The first pass gives GitHub a chance + to calculate participation stats for every changed repository; the second + pass revisits only repositories that returned ``202`` during the first + request. + + Parameters + ---------- + repos : list + Active PyGithub Repository objects. + headers : dict + HTTP headers including the GitHub authorisation token. + """ + pending_repos = [] + + for repo in tqdm( + iterable=repos, + desc='Priming GitHub commit activity', + ): + sha = _run_github_repo_step(repo, 'default branch SHA', lambda repo=repo: _default_branch_sha(repo)) + if sha and _has_cached_commit_activity(repo) and _cached_commit_activity_sha(repo) == sha: + continue + + status = _fetch_commit_activity(repo, headers, sha) + if status == COMMIT_ACTIVITY_PENDING: + pending_repos.append((repo, sha)) + + if not pending_repos: + return + + still_pending = [] + for repo, sha in tqdm( + iterable=pending_repos, + desc='Collecting GitHub commit activity', + ): + status = _fetch_commit_activity(repo, headers, sha) + if status == COMMIT_ACTIVITY_PENDING: + still_pending.append(repo.name) + + if still_pending: + repo_names = ', '.join(still_pending) + log.warning(f'GitHub commit activity is still being calculated for: {repo_names}') + + +def _process_github_repo(repo, headers: dict, graphql_url: str) -> None: + """ + Collect and cache all per-repository data for a single GitHub repo. + + Parameters + ---------- + repo : + PyGithub Repository object. + headers : dict + HTTP headers including the GitHub authorisation token. + graphql_url : str + GitHub GraphQL endpoint URL. + """ + # languages + languages = _run_github_repo_step(repo, 'languages', repo.get_languages) + if languages is not None: + file_path = os.path.join(BASE_DIR, 'github', 'languages', repo.name) + helpers.write_json_files(file_path=file_path, data=languages) + + # open pull requests + pulls_data = _run_github_repo_step(repo, 'pull requests', lambda: _collect_open_pulls(repo)) + if pulls_data is not None: + file_path = os.path.join(BASE_DIR, 'github', 'pulls', repo.name) + helpers.write_json_files(file_path=file_path, data=pulls_data) + + # open code scanning alerts and per-day history + alerts = _run_github_repo_step(repo, 'code scanning alerts', lambda: _fetch_code_scanning_alerts(repo)) + if alerts is not None: + open_alert_count = sum( + 1 for a in alerts if getattr(a, 'state', None) == 'open' + ) + file_path = os.path.join(BASE_DIR, 'github', 'codeScanning', repo.name) + helpers.write_json_files(file_path=file_path, data={ + 'repo': repo.name, + 'open': open_alert_count, + 'updated_at': datetime.now(tz=timezone.utc).isoformat(), + }) + + code_scanning_history = _build_code_scanning_history(alerts) + file_path = os.path.join(BASE_DIR, 'github', 'codeScanningHistory', repo.name) + helpers.write_json_files(file_path=file_path, data=code_scanning_history) + + # star history (sampled to cap API calls) + star_history = _run_github_repo_step(repo, 'star history', lambda: _collect_star_history(repo)) + if star_history: + file_path = os.path.join(BASE_DIR, 'github', 'starHistory', repo.name) + helpers.write_json_files(file_path=file_path, data=star_history) + + # openGraphImages - uses GraphQL + image_url = _run_github_repo_step( + repo, + 'OpenGraph image URL', + lambda: _fetch_open_graph_image_url(repo, headers, graphql_url), + ) + if image_url and 'avatars' not in image_url: file_path = os.path.join(BASE_DIR, 'github', 'openGraphImages', repo.name) - helpers.save_image_from_url( - file_path=file_path, - file_extension='png', - image_url=image_url, - size_x=624, - size_y=312, + _run_github_repo_step( + repo, + 'OpenGraph image download', + lambda: helpers.save_image_from_url( + file_path=file_path, + file_extension='png', + image_url=image_url, + size_x=624, + size_y=312, + ), + timeout=30, ) @@ -542,16 +885,19 @@ def update_github(): # GraphQL query still uses direct requests headers = { + 'Accept': 'application/vnd.github+json', 'Authorization': f'token {os.environ["GITHUB_TOKEN"]}', + 'X-GitHub-Api-Version': '2022-11-28', } graphql_url = 'https://api.github.com/graphql' + active_repos = [repo for repo in repos if not repo.archived] + _collect_commit_activity(active_repos, headers) + for repo in tqdm( - iterable=repos, + iterable=active_repos, desc='Updating GitHub data', ): - if repo.archived: - continue _process_github_repo(repo, headers, graphql_url) diff --git a/tests/unit/test_updater.py b/tests/unit/test_updater.py index 42759f87a..25c2453e6 100644 --- a/tests/unit/test_updater.py +++ b/tests/unit/test_updater.py @@ -1,6 +1,6 @@ # standard imports import json -from concurrent.futures import TimeoutError as FuturesTimeout +import time from datetime import datetime, timezone from types import SimpleNamespace @@ -26,11 +26,6 @@ def json(self): return self._payload -class FakeWeek: - def __init__(self, week, total): - self.raw_data = {'week': week, 'total': total} - - class FakePull: def __init__(self, number=1): self.number = number @@ -61,18 +56,21 @@ def get_page(self, idx): class FakeRepo: - def __init__(self, name='repo1', archived=False, stars=4): + def __init__(self, name='repo1', archived=False, stars=4, sha=None): self.name = name self.archived = archived self.owner = SimpleNamespace(login='owner') self.stargazers_count = stars + self.default_branch = 'master' + self.sha = sha or f'sha-{name}' self.raw_data = {'name': name, 'archived': archived} def get_languages(self): return {'Python': 100} - def get_stats_commit_activity(self): - return [FakeWeek(1, 1)] + def get_branch(self, branch): + assert branch == self.default_branch + return SimpleNamespace(commit=SimpleNamespace(sha=self.sha)) def get_pulls(self, state='open'): assert state == 'open' @@ -229,37 +227,191 @@ def fake_get(url): assert 'paging' not in writes[0][1] -def test_get_stats_with_timeout_success_and_timeout(monkeypatch): - class FutureOk: - def result(self, timeout): - return [1] +def test_fetch_commit_activity(monkeypatch, tmp_path): + monkeypatch.setattr(updater, 'BASE_DIR', str(tmp_path / 'gh-pages')) - class FutureTimeout: - def result(self, timeout): - raise FuturesTimeout() + fixed_today = datetime(2026, 5, 19, tzinfo=timezone.utc) - class Pool: - def __init__(self, future): - self.future = future + class FixedDatetime(datetime): + @classmethod + def now(cls, tz=None): + return fixed_today - def __enter__(self): - return self + monkeypatch.setattr(updater, 'datetime', FixedDatetime) - def __exit__(self, *args): - return False + writes = [] + monkeypatch.setattr(updater.helpers, 'write_json_files', lambda file_path, data: writes.append((file_path, data))) - def submit(self, func): - return self.future + urls = [] + monkeypatch.setattr( + updater.helpers.s, + 'get', + lambda url, headers: urls.append(url) or FakeResponse({'all': [0, 2]}, status=200), + ) + + repo = FakeRepo(name='demo') + headers = {'Authorization': 'token'} - monkeypatch.setattr(updater, 'ThreadPoolExecutor', lambda max_workers: Pool(FutureOk())) - repo = SimpleNamespace(name='x', get_stats_commit_activity=lambda: [1]) - assert updater._get_stats_with_timeout(repo) == [1] + assert updater._fetch_commit_activity(repo, headers, sha='abc') == updater.COMMIT_ACTIVITY_READY + assert urls == ['https://api.github.com/repos/owner/demo/stats/participation'] + assert len(writes) == 2 + assert writes[0][0].endswith(('commitActivity\\demo', 'commitActivity/demo')) + assert writes[0][1] == [ + {'days': [0, 0, 0, 0, 0, 0, 0], 'total': 0, 'week': 1778371200}, + {'days': [0, 0, 0, 0, 0, 0, 0], 'total': 2, 'week': 1778976000}, + ] + assert writes[1][0].endswith(('commitActivityHashes\\demo', 'commitActivityHashes/demo')) + assert writes[1][1] == {'sha': 'abc'} + +def test_fetch_commit_activity_errors(monkeypatch): + repo = FakeRepo(name='demo') + headers = {'Authorization': 'token'} warnings = [] monkeypatch.setattr(updater.log, 'warning', lambda msg: warnings.append(msg)) - monkeypatch.setattr(updater, 'ThreadPoolExecutor', lambda max_workers: Pool(FutureTimeout())) - assert updater._get_stats_with_timeout(repo) is None - assert warnings + + assert updater._participation_to_commit_activity({'all': 'bad'}) == [] + + def raise_timeout(url, headers): + raise requests.exceptions.Timeout('timeout') + + monkeypatch.setattr(updater.helpers.s, 'get', raise_timeout) + assert updater._fetch_commit_activity(repo, headers) == updater.COMMIT_ACTIVITY_FAILED + + monkeypatch.setattr(updater.helpers.s, 'get', lambda url, headers: FakeResponse(status=202)) + assert updater._fetch_commit_activity(repo, headers) == updater.COMMIT_ACTIVITY_PENDING + + monkeypatch.setattr( + updater.helpers.s, + 'get', + lambda url, headers: FakeResponse(status=500, raises=ValueError('bad')), + ) + assert updater._fetch_commit_activity(repo, headers) == updater.COMMIT_ACTIVITY_FAILED + + monkeypatch.setattr( + updater.helpers.s, + 'get', + lambda url, headers: FakeResponse({'message': 'rate limit'}, status=403), + ) + assert updater._fetch_commit_activity(repo, headers) == updater.COMMIT_ACTIVITY_FAILED + + monkeypatch.setattr(updater.helpers.s, 'get', lambda url, headers: FakeResponse({'all': []}, status=200)) + assert updater._fetch_commit_activity(repo, headers) == updater.COMMIT_ACTIVITY_FAILED + assert len(warnings) == 4 + + +def test_run_github_repo_step_error(monkeypatch): + repo = FakeRepo(name='demo') + warnings = [] + monkeypatch.setattr(updater.log, 'warning', lambda msg: warnings.append(msg)) + + def raise_error(): + raise RuntimeError('boom') + + result = updater._run_github_repo_step(repo, 'broken step', raise_error, default='fallback') + + assert updater._run_github_repo_step(repo, 'normal step', lambda: 'ok') == 'ok' + assert result == 'fallback' + assert warnings == ['Error running GitHub broken step for demo: boom'] + + +def test_run_github_repo_step_timeout(monkeypatch): + repo = FakeRepo(name='demo') + warnings = [] + monkeypatch.setattr(updater.log, 'warning', lambda msg: warnings.append(msg)) + + result = updater._run_github_repo_step( + repo, + 'slow step', + lambda: time.sleep(0.05), + default='fallback', + timeout=0.001, + ) + + assert result == 'fallback' + assert warnings == ['Timeout after 0.001s while running GitHub slow step for demo, skipping.'] + + +def test_commit_activity_cache_helpers(tmp_path, monkeypatch): + monkeypatch.setattr(updater, 'BASE_DIR', str(tmp_path / 'gh-pages')) + repo = FakeRepo(name='demo') + + assert not updater._has_cached_commit_activity(repo) + assert updater._cached_commit_activity_sha(repo) is None + + stats_path = tmp_path / 'gh-pages' / 'github' / 'commitActivity' / 'demo.json' + hash_path = tmp_path / 'gh-pages' / 'github' / 'commitActivityHashes' / 'demo.json' + stats_path.parent.mkdir(parents=True) + hash_path.parent.mkdir(parents=True) + + stats_path.write_text('{bad', encoding='utf-8') + hash_path.write_text('[]', encoding='utf-8') + assert not updater._has_cached_commit_activity(repo) + assert updater._cached_commit_activity_sha(repo) is None + + stats_path.write_text('[{"total": 1}]', encoding='utf-8') + hash_path.write_text('{"sha": "abc"}', encoding='utf-8') + assert updater._has_cached_commit_activity(repo) + assert updater._cached_commit_activity_sha(repo) == 'abc' + + +def test_collect_commit_activity_uses_sha_cache(monkeypatch, tmp_path): + monkeypatch.setattr(updater, 'BASE_DIR', str(tmp_path / 'gh-pages')) + + cached = FakeRepo('cached', sha='same') + changed = FakeRepo('changed', sha='new') + missing = FakeRepo('missing', sha='missing') + stuck = FakeRepo('stuck', sha='stuck') + + updater._write_commit_activity(cached, [{'total': 1}], 'same') + updater._write_commit_activity(changed, [{'total': 1}], 'old') + + calls = [] + warnings = [] + statuses = { + 'changed': [updater.COMMIT_ACTIVITY_READY], + 'missing': [updater.COMMIT_ACTIVITY_PENDING, updater.COMMIT_ACTIVITY_READY], + 'stuck': [updater.COMMIT_ACTIVITY_PENDING, updater.COMMIT_ACTIVITY_PENDING], + } + + def fake_fetch(repo, headers, sha=None): + calls.append((repo.name, sha)) + return statuses[repo.name].pop(0) + + monkeypatch.setattr(updater, '_fetch_commit_activity', fake_fetch) + monkeypatch.setattr(updater.log, 'warning', lambda msg: warnings.append(msg)) + + updater._collect_commit_activity([cached, changed, missing, stuck], {}) + + expected_warning = 'GitHub commit activity is still being calculated for: stuck' + assert calls == [ + ('changed', 'new'), + ('missing', 'missing'), + ('stuck', 'stuck'), + ('missing', 'missing'), + ('stuck', 'stuck'), + ] + assert warnings == [expected_warning] + + +def test_collect_commit_activity_returns_when_all_ready(monkeypatch, tmp_path): + monkeypatch.setattr(updater, 'BASE_DIR', str(tmp_path / 'gh-pages')) + + repo = FakeRepo('ready', sha='new') + calls = [] + warnings = [] + + def fake_fetch(repo, headers, sha=None): + calls.append((repo.name, sha)) + return updater.COMMIT_ACTIVITY_READY + + monkeypatch.setattr(updater, '_fetch_commit_activity', fake_fetch) + monkeypatch.setattr(updater.log, 'warning', lambda msg: warnings.append(msg)) + + updater._collect_commit_activity([repo], {}) + + assert calls == [('ready', 'new')] + assert warnings == [] def test_seed_star_history(monkeypatch): @@ -331,7 +483,6 @@ def test_process_github_repo(monkeypatch, tmp_path): 'save_image_from_url', lambda **kwargs: writes.append(('img', kwargs['file_path'])) ) - monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo: [FakeWeek(1, 1)]) monkeypatch.setattr(updater, '_collect_star_history', lambda repo: [{'date': '2026-01-01', 'stars': 1}]) monkeypatch.setattr(updater, '_fetch_code_scanning_alerts', lambda repo: []) monkeypatch.setattr( @@ -357,10 +508,11 @@ def post_ok(url, json, headers): def test_process_github_repo_error_and_avatar_skip(monkeypatch, tmp_path): monkeypatch.setattr(updater, 'BASE_DIR', str(tmp_path / 'gh-pages')) monkeypatch.setattr(updater.helpers, 'write_json_files', lambda **kwargs: None) - monkeypatch.setattr(updater, '_get_stats_with_timeout', lambda repo: None) monkeypatch.setattr(updater, '_collect_star_history', lambda repo: []) monkeypatch.setattr(updater, '_fetch_code_scanning_alerts', lambda repo: []) monkeypatch.setattr(updater, '_build_code_scanning_history', lambda alerts: []) + warnings = [] + monkeypatch.setattr(updater.log, 'warning', lambda msg: warnings.append(msg)) monkeypatch.setattr( updater.helpers.s, @@ -378,8 +530,8 @@ def test_process_github_repo_error_and_avatar_skip(monkeypatch, tmp_path): updater._process_github_repo(FakeRepo(name='demo'), {'Authorization': 'x'}, 'https://api.github.com/graphql') monkeypatch.setattr(updater.helpers.s, 'post', lambda url, json, headers: FakeResponse({'bad': 1})) - with pytest.raises(SystemExit): - updater._process_github_repo(FakeRepo(name='demo'), {'Authorization': 'x'}, 'https://api.github.com/graphql') + updater._process_github_repo(FakeRepo(name='demo'), {'Authorization': 'x'}, 'https://api.github.com/graphql') + assert any('OpenGraph image URL' in warning for warning in warnings) def test_update_github(monkeypatch): @@ -387,9 +539,10 @@ def test_update_github(monkeypatch): monkeypatch.setenv('GITHUB_REPOSITORY_OWNER', 'owner') repo_active = FakeRepo('active', archived=False) + repo_pending = FakeRepo('pending', archived=False) repo_archived = FakeRepo('archived', archived=True) - owner = SimpleNamespace(get_repos=lambda: [repo_active, repo_archived]) + owner = SimpleNamespace(get_repos=lambda: [repo_active, repo_pending, repo_archived]) class FakeGithub: def __init__(self, auth, timeout): @@ -403,6 +556,12 @@ def get_user(self, name): writes = [] monkeypatch.setattr(updater.helpers, 'write_json_files', lambda file_path, data: writes.append((file_path, data))) + commit_repos = [] + monkeypatch.setattr( + updater, + '_collect_commit_activity', + lambda repos, headers: commit_repos.extend(repo.name for repo in repos), + ) processed = [] monkeypatch.setattr(updater, '_process_github_repo', lambda repo, headers, graphql_url: processed.append(repo.name)) monkeypatch.setattr(updater, 'BASE_DIR', 'base') @@ -410,7 +569,8 @@ def get_user(self, name): updater.update_github() assert any(path.endswith('github\\repos') or path.endswith('github/repos') for path, _ in writes) - assert processed == ['active'] + assert commit_repos == ['active', 'pending'] + assert processed == ['active', 'pending'] def test_update_patreon(monkeypatch):