Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/cleanup-pr-caches.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Cleanup PR caches

# When a pull request closes (merged or not), delete every GitHub Actions
# cache entry scoped to its merge ref. This reclaims the repo's 10 GB
# cache budget within seconds of close, without adding any overhead to
# the regular build/test runs.
#
# Reference:
# https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries

on:
# Use pull_request_target rather than pull_request: GITHUB_TOKEN is
# read-only for pull_request events from forks, which would cause every
Comment thread
hjmjohnson marked this conversation as resolved.
# real-world PR cache delete to return 403. pull_request_target runs
# in the base-repo context with the permissions this workflow requests,
# and is safe here because the workflow never checks out or executes
# fork-authored code -- it only calls the GitHub API.
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication#permissions-for-the-github_token
pull_request_target:
types: [closed]

jobs:
purge-pr-caches:
runs-on: ubuntu-latest
permissions:
actions: write # required to delete caches
contents: read
steps:
- name: Delete all caches for the closed PR
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
PR_NUM: ${{ github.event.pull_request.number }}
run: |
set -euo pipefail
# Sweep both the merge-ref and the head-ref. Most pull_request
# workflows cache against refs/pull/N/merge (github.sha resolves
# to the merge commit), but workflows that key on github.head_ref
# can write caches to refs/pull/N/head. Cleaning up both is
# belt-and-braces against future workflow additions.
total=0
for scope in merge head; do
ref="refs/pull/${PR_NUM}/${scope}"
echo "Purging caches for ${REPO} at ref=${ref}"
while read -r id; do
[ -z "$id" ] && continue
if gh api -X DELETE "repos/${REPO}/actions/caches/${id}" >/dev/null 2>&1; then
total=$((total + 1))
echo " deleted cache id=${id} (ref=${ref})"
else
echo " WARN: failed to delete cache id=${id} (ref=${ref})"
fi
done < <(gh api --paginate \
"repos/${REPO}/actions/caches?ref=${ref}&per_page=100" \
--jq '.actions_caches[].id')
done
echo "Purged ${total} cache entries across merge+head refs."
84 changes: 84 additions & 0 deletions .github/workflows/cleanup-stale-caches-nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: Cleanup stale caches (nightly sweep)

# Safety net for the cleanup-on-close workflow: once per day, scan the
# repository's GitHub Actions caches and purge any cache scoped to a
# pull-request merge ref whose PR has been closed for more than a
# 3-day grace period. The grace period lets anyone spot-rerun a
# just-merged PR before its caches vanish.
#
# This catches the edge cases the pull_request:closed trigger misses:
# - PRs closed during a cleanup-workflow outage
# - caches orphaned when a PR was closed before this workflow existed
# - caches stuck on refs/pull/N/merge after branch deletion
#
# Reference:
# https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries

on:
schedule:
- cron: '0 6 * * *' # 06:00 UTC daily
workflow_dispatch:

jobs:
sweep:
runs-on: ubuntu-latest
permissions:
actions: write # required to delete caches
pull-requests: read # required to check PR state
steps:
- name: Purge caches for PRs closed more than 3 days ago
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
GRACE_DAYS: '3'
run: |
set -euo pipefail
CUTOFF=$(date -u -d "${GRACE_DAYS} days ago" +%s)
echo "Grace cutoff: PRs closed before $(date -u -d "@${CUTOFF}" --iso-8601=seconds)"

# Step 1: enumerate every PR-scoped cache (id + pr number).
tmpdir=$(mktemp -d)
trap 'rm -rf "$tmpdir"' EXIT
gh api --paginate \
"repos/${REPO}/actions/caches?per_page=100" \
--jq '.actions_caches[] |
Comment thread
hjmjohnson marked this conversation as resolved.
select(.ref | startswith("refs/pull/")) |
[.id, (.ref | capture("refs/pull/(?<n>[0-9]+)/").n)] |
@tsv' > "${tmpdir}/caches.tsv"
total_scanned=$(wc -l < "${tmpdir}/caches.tsv")

# Step 2: one API call per *distinct* PR (not per cache).
awk '{print $2}' "${tmpdir}/caches.tsv" | sort -u > "${tmpdir}/prs.txt"
: > "${tmpdir}/prstate.tsv"
while read -r pr; do
info=$(gh pr view "$pr" --repo "$REPO" \
--json state,closedAt 2>/dev/null || echo '{}')
Comment thread
hjmjohnson marked this conversation as resolved.
state=$(echo "$info" | jq -r '.state // "UNKNOWN"')
closed=$(echo "$info" | jq -r '.closedAt // "null"')
printf '%s\t%s\t%s\n' "$pr" "$state" "$closed" >> "${tmpdir}/prstate.tsv"
done < "${tmpdir}/prs.txt"

# Step 3: join caches with PR state and purge those past the grace cutoff.
total_purged=0
while read -r id pr; do
[ -z "$id" ] && continue
row=$(awk -v p="$pr" '$1 == p' "${tmpdir}/prstate.tsv")
state=$(echo "$row" | cut -f2)
closed=$(echo "$row" | cut -f3)
if [ "$state" = "OPEN" ] || [ "$closed" = "null" ]; then
continue
fi
closed_ts=$(date -u -d "$closed" +%s 2>/dev/null || echo 0)
[ "$closed_ts" -eq 0 ] && continue
if [ "$closed_ts" -lt "$CUTOFF" ]; then
if gh api -X DELETE "repos/${REPO}/actions/caches/${id}" >/dev/null 2>&1; then
total_purged=$((total_purged + 1))
echo " purged cache id=${id} (PR #${pr} ${state} since ${closed})"
else
echo " WARN: failed to delete cache id=${id} (PR #${pr})"
fi
fi
done < "${tmpdir}/caches.tsv"

distinct_prs=$(wc -l < "${tmpdir}/prs.txt")
echo "Scanned ${total_scanned} PR-scoped caches across ${distinct_prs} distinct PRs; purged ${total_purged}."
Loading