From 2c383479ba8b0fec43e747199091a1b35425216d Mon Sep 17 00:00:00 2001 From: Hans Johnson Date: Thu, 23 Apr 2026 17:07:12 +0000 Subject: [PATCH] ENH: Automatic GitHub Actions cache cleanup for closed/merged PRs Add two complementary workflows that free the repository's 10 GB Actions-cache budget without adding overhead to regular CI: - cleanup-pr-caches.yml event-driven, fires on PR close - cleanup-stale-caches-nightly.yml scheduled sweep, 3-day grace The event-driven workflow deletes every cache scoped to the closed PR's merge ref within seconds of close, using the minimal permission set (actions: write, contents: read). The nightly sweep is the safety net: it catches caches orphaned during a cleanup-workflow outage, or those pre-dating this workflow. Motivation: ITK regularly hits the 10 GB per-repo cache cap because ccache entries (2-3 GB per platform, 3 platforms) accumulate across open and closed PRs. Once the cap is reached, GitHub silently rejects all subsequent cache saves with "Cache reservation failed: you have reached your configured budget, your cache is now read only". This manifested on PR #6109's Pixi-Cxx + ARMBUILD runs where both the ccache and the new externaldata-* saves failed, even though the workflows are correctly wired. Pattern follows the GitHub documentation for force-deleting cache entries: https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries Robustness properties of the on-close workflow: - ref-scoped delete (refs/pull/N/merge) cannot touch refs/heads/main or other PR refs - idempotent: re-running finds 0 caches and exits 0 - works for PRs from forks (runs in upstream context with fork's PR number and upstream's GITHUB_TOKEN) - closed state is terminal: a reopened PR gets fresh cache entries tied to new commits; deletions target the previous-closure era --- .github/workflows/cleanup-pr-caches.yml | 57 +++++++++++++ .../cleanup-stale-caches-nightly.yml | 84 +++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 .github/workflows/cleanup-pr-caches.yml create mode 100644 .github/workflows/cleanup-stale-caches-nightly.yml diff --git a/.github/workflows/cleanup-pr-caches.yml b/.github/workflows/cleanup-pr-caches.yml new file mode 100644 index 000000000000..479f4deb27cf --- /dev/null +++ b/.github/workflows/cleanup-pr-caches.yml @@ -0,0 +1,57 @@ +name: Cleanup PR caches + +# When a pull request closes (merged or not), delete every GitHub Actions +# cache entry scoped to its merge ref. This reclaims the repo's 10 GB +# cache budget within seconds of close, without adding any overhead to +# the regular build/test runs. +# +# Reference: +# https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries + +on: + # Use pull_request_target rather than pull_request: GITHUB_TOKEN is + # read-only for pull_request events from forks, which would cause every + # real-world PR cache delete to return 403. pull_request_target runs + # in the base-repo context with the permissions this workflow requests, + # and is safe here because the workflow never checks out or executes + # fork-authored code -- it only calls the GitHub API. + # https://docs.github.com/en/actions/security-guides/automatic-token-authentication#permissions-for-the-github_token + pull_request_target: + types: [closed] + +jobs: + purge-pr-caches: + runs-on: ubuntu-latest + permissions: + actions: write # required to delete caches + contents: read + steps: + - name: Delete all caches for the closed PR + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + PR_NUM: ${{ github.event.pull_request.number }} + run: | + set -euo pipefail + # Sweep both the merge-ref and the head-ref. Most pull_request + # workflows cache against refs/pull/N/merge (github.sha resolves + # to the merge commit), but workflows that key on github.head_ref + # can write caches to refs/pull/N/head. Cleaning up both is + # belt-and-braces against future workflow additions. + total=0 + for scope in merge head; do + ref="refs/pull/${PR_NUM}/${scope}" + echo "Purging caches for ${REPO} at ref=${ref}" + while read -r id; do + [ -z "$id" ] && continue + if gh api -X DELETE "repos/${REPO}/actions/caches/${id}" >/dev/null 2>&1; then + total=$((total + 1)) + echo " deleted cache id=${id} (ref=${ref})" + else + echo " WARN: failed to delete cache id=${id} (ref=${ref})" + fi + done < <(gh api --paginate \ + "repos/${REPO}/actions/caches?ref=${ref}&per_page=100" \ + --jq '.actions_caches[].id') + done + echo "Purged ${total} cache entries across merge+head refs." diff --git a/.github/workflows/cleanup-stale-caches-nightly.yml b/.github/workflows/cleanup-stale-caches-nightly.yml new file mode 100644 index 000000000000..83c69846313a --- /dev/null +++ b/.github/workflows/cleanup-stale-caches-nightly.yml @@ -0,0 +1,84 @@ +name: Cleanup stale caches (nightly sweep) + +# Safety net for the cleanup-on-close workflow: once per day, scan the +# repository's GitHub Actions caches and purge any cache scoped to a +# pull-request merge ref whose PR has been closed for more than a +# 3-day grace period. The grace period lets anyone spot-rerun a +# just-merged PR before its caches vanish. +# +# This catches the edge cases the pull_request:closed trigger misses: +# - PRs closed during a cleanup-workflow outage +# - caches orphaned when a PR was closed before this workflow existed +# - caches stuck on refs/pull/N/merge after branch deletion +# +# Reference: +# https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries + +on: + schedule: + - cron: '0 6 * * *' # 06:00 UTC daily + workflow_dispatch: + +jobs: + sweep: + runs-on: ubuntu-latest + permissions: + actions: write # required to delete caches + pull-requests: read # required to check PR state + steps: + - name: Purge caches for PRs closed more than 3 days ago + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + GRACE_DAYS: '3' + run: | + set -euo pipefail + CUTOFF=$(date -u -d "${GRACE_DAYS} days ago" +%s) + echo "Grace cutoff: PRs closed before $(date -u -d "@${CUTOFF}" --iso-8601=seconds)" + + # Step 1: enumerate every PR-scoped cache (id + pr number). + tmpdir=$(mktemp -d) + trap 'rm -rf "$tmpdir"' EXIT + gh api --paginate \ + "repos/${REPO}/actions/caches?per_page=100" \ + --jq '.actions_caches[] | + select(.ref | startswith("refs/pull/")) | + [.id, (.ref | capture("refs/pull/(?[0-9]+)/").n)] | + @tsv' > "${tmpdir}/caches.tsv" + total_scanned=$(wc -l < "${tmpdir}/caches.tsv") + + # Step 2: one API call per *distinct* PR (not per cache). + awk '{print $2}' "${tmpdir}/caches.tsv" | sort -u > "${tmpdir}/prs.txt" + : > "${tmpdir}/prstate.tsv" + while read -r pr; do + info=$(gh pr view "$pr" --repo "$REPO" \ + --json state,closedAt 2>/dev/null || echo '{}') + state=$(echo "$info" | jq -r '.state // "UNKNOWN"') + closed=$(echo "$info" | jq -r '.closedAt // "null"') + printf '%s\t%s\t%s\n' "$pr" "$state" "$closed" >> "${tmpdir}/prstate.tsv" + done < "${tmpdir}/prs.txt" + + # Step 3: join caches with PR state and purge those past the grace cutoff. + total_purged=0 + while read -r id pr; do + [ -z "$id" ] && continue + row=$(awk -v p="$pr" '$1 == p' "${tmpdir}/prstate.tsv") + state=$(echo "$row" | cut -f2) + closed=$(echo "$row" | cut -f3) + if [ "$state" = "OPEN" ] || [ "$closed" = "null" ]; then + continue + fi + closed_ts=$(date -u -d "$closed" +%s 2>/dev/null || echo 0) + [ "$closed_ts" -eq 0 ] && continue + if [ "$closed_ts" -lt "$CUTOFF" ]; then + if gh api -X DELETE "repos/${REPO}/actions/caches/${id}" >/dev/null 2>&1; then + total_purged=$((total_purged + 1)) + echo " purged cache id=${id} (PR #${pr} ${state} since ${closed})" + else + echo " WARN: failed to delete cache id=${id} (PR #${pr})" + fi + fi + done < "${tmpdir}/caches.tsv" + + distinct_prs=$(wc -l < "${tmpdir}/prs.txt") + echo "Scanned ${total_scanned} PR-scoped caches across ${distinct_prs} distinct PRs; purged ${total_purged}."