Skip to content

Commit bfd7572

Browse files
BUILD-10724: Import GitHub cache to S3 when no S3 cache exists (migration mode)
When the S3 backend is used and the S3 cache misses, automatically attempt to restore the cache from GitHub using the original unprefixed key. The S3 post-job step will then save the restored content to S3, pre-provisioning it for subsequent runs. The feature is enabled by default. Resolution order to disable it: 1. Action input `import-github-cache: 'false'` 2. Environment variable `CACHE_IMPORT_GITHUB=false` 3. Default: true `fail-on-cache-miss` and `lookup-only` are propagated to the GitHub fallback step. When `fail-on-cache-miss` is set and import mode is active, failure is deferred until both S3 and GitHub have been tried. Also adds `.github/workflows/check-cache-migration.yml`: a manually-triggered workflow that compares GitHub cache entries to S3 objects across target branches (main, master, branch-*, dogfood-on-*, feature/long/*), ignoring transient keys (build-number-*, mise-*). When 100% of entries are found in S3, it automatically sets the CACHE_IMPORT_GITHUB=false repository variable to disable the import fallback (this requires the `CACHE_IMPORT_GITHUB` environment variable to be set from the repository variable via `${{ vars.CACHE_IMPORT_GITHUB }}`). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent e1bd24c commit bfd7572

4 files changed

Lines changed: 473 additions & 22 deletions

File tree

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
---
2+
name: Check cache migration (GitHub → S3)
3+
4+
on:
5+
workflow_dispatch:
6+
inputs:
7+
environment:
8+
description: Cache environment to check
9+
required: true
10+
default: prod
11+
type: choice
12+
options:
13+
- prod
14+
- dev
15+
16+
jobs:
17+
check-migration:
18+
runs-on: ubuntu-latest
19+
name: Compare GitHub cache vs S3
20+
permissions:
21+
id-token: write
22+
contents: read
23+
actions: write # required to list GitHub cache entries and set the opt-out repository variable
24+
25+
steps:
26+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
27+
28+
- name: Setup S3 cache credentials
29+
id: aws-auth
30+
uses: SonarSource/gh-action_cache/credential-setup@v1
31+
with:
32+
environment: ${{ inputs.environment }}
33+
34+
- name: List GitHub cache entries
35+
id: gh-caches
36+
shell: bash
37+
env:
38+
GITHUB_TOKEN: ${{ github.token }}
39+
GITHUB_REPOSITORY: ${{ github.repository }}
40+
run: |
41+
# Fetch all GitHub cache entries (paginated, up to 10 000)
42+
PAGE=1
43+
PER_PAGE=100
44+
ALL_ENTRIES="[]"
45+
while true; do
46+
RESPONSE=$(curl -s -f \
47+
-H "Authorization: token $GITHUB_TOKEN" \
48+
-H "Accept: application/vnd.github+json" \
49+
"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/caches?per_page=${PER_PAGE}&page=${PAGE}")
50+
ENTRIES=$(echo "$RESPONSE" | jq '.actions_caches')
51+
COUNT=$(echo "$ENTRIES" | jq 'length')
52+
ALL_ENTRIES=$(echo "$ALL_ENTRIES $ENTRIES" | jq -s 'add')
53+
if [[ "$COUNT" -lt "$PER_PAGE" ]]; then
54+
break
55+
fi
56+
PAGE=$((PAGE + 1))
57+
done
58+
59+
# Filter: include only target branches, exclude unwanted key patterns
60+
FILTERED=$(echo "$ALL_ENTRIES" | jq '[
61+
.[] |
62+
select(
63+
(.ref | test("^refs/heads/(main|master|branch-.+|dogfood-on-.+|feature/long/.+)$")) and
64+
(.key | test("^(build-number-|mise-)") | not)
65+
) |
66+
{ ref: .ref, key: .key }
67+
]')
68+
69+
TOTAL=$(echo "$ALL_ENTRIES" | jq 'length')
70+
INCLUDED=$(echo "$FILTERED" | jq 'length')
71+
echo "Total GitHub cache entries: $TOTAL"
72+
echo "Included for migration check: $INCLUDED"
73+
74+
# Write to file for next step
75+
echo "$FILTERED" > /tmp/gh_caches.json
76+
echo "included-count=$INCLUDED" >> "$GITHUB_OUTPUT"
77+
78+
- name: List S3 cache objects
79+
id: s3-objects
80+
shell: bash
81+
env:
82+
AWS_ACCESS_KEY_ID: ${{ steps.aws-auth.outputs.AWS_ACCESS_KEY_ID }}
83+
AWS_SECRET_ACCESS_KEY: ${{ steps.aws-auth.outputs.AWS_SECRET_ACCESS_KEY }}
84+
AWS_SESSION_TOKEN: ${{ steps.aws-auth.outputs.AWS_SESSION_TOKEN }}
85+
AWS_DEFAULT_REGION: eu-central-1
86+
S3_BUCKET: sonarsource-s3-cache-${{ inputs.environment }}-bucket
87+
run: |
88+
# List all S3 objects with pagination handled internally by the CLI
89+
aws s3 ls "s3://$S3_BUCKET/" --recursive \
90+
| awk '{print $4}' \
91+
| sort > /tmp/s3_keys.txt
92+
93+
S3_COUNT=$(wc -l < /tmp/s3_keys.txt)
94+
echo "Total S3 cache objects: $S3_COUNT"
95+
echo "s3-count=$S3_COUNT" >> "$GITHUB_OUTPUT"
96+
97+
- name: Compare and report
98+
id: compare
99+
shell: bash
100+
env:
101+
GITHUB_TOKEN: ${{ github.token }}
102+
GITHUB_REPOSITORY: ${{ github.repository }}
103+
run: |
104+
INCLUDED=$(jq 'length' /tmp/gh_caches.json)
105+
MIGRATED=0
106+
MISSING=()
107+
108+
while IFS= read -r ENTRY; do
109+
REF=$(echo "$ENTRY" | jq -r '.ref')
110+
KEY=$(echo "$ENTRY" | jq -r '.key')
111+
# S3 keys use full ref for push events (refs/heads/main/{key}) and short branch name for PR events (feat/someBranch/{key}).
112+
# Check both forms since we cannot know which was used at save time.
113+
REF_SHORT="${REF#refs/heads/}"
114+
S3_KEY_FULL="${REF}/${KEY}"
115+
S3_KEY_SHORT="${REF_SHORT}/${KEY}"
116+
if grep -qxF "$S3_KEY_FULL" /tmp/s3_keys.txt || grep -qxF "$S3_KEY_SHORT" /tmp/s3_keys.txt; then
117+
MIGRATED=$((MIGRATED + 1))
118+
else
119+
MISSING+=("$S3_KEY_SHORT")
120+
fi
121+
done < <(jq -c '.[]' /tmp/gh_caches.json)
122+
123+
echo ""
124+
echo "========================================="
125+
echo " Migration status: $MIGRATED / $INCLUDED"
126+
echo "========================================="
127+
128+
if [[ "${#MISSING[@]}" -gt 0 ]]; then
129+
echo ""
130+
echo "Missing in S3 (${#MISSING[@]} entries):"
131+
printf ' %s\n' "${MISSING[@]}"
132+
fi
133+
134+
if [[ "$MIGRATED" -eq "$INCLUDED" && "$INCLUDED" -gt 0 ]]; then
135+
echo ""
136+
echo "All included GitHub cache entries are present in S3."
137+
echo "migration-complete=true" >> "$GITHUB_OUTPUT"
138+
else
139+
echo "migration-complete=false" >> "$GITHUB_OUTPUT"
140+
fi
141+
142+
- name: Set CACHE_IMPORT_GITHUB=false (migration complete)
143+
if: steps.compare.outputs.migration-complete == 'true'
144+
shell: bash
145+
env:
146+
GITHUB_TOKEN: ${{ github.token }}
147+
GITHUB_REPOSITORY: ${{ github.repository }}
148+
run: |
149+
VARIABLE_NAME="CACHE_IMPORT_GITHUB"
150+
151+
# Check if variable already exists
152+
STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
153+
-H "Authorization: token $GITHUB_TOKEN" \
154+
-H "Accept: application/vnd.github+json" \
155+
"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/variables/${VARIABLE_NAME}")
156+
157+
if [[ "$STATUS" == "200" ]]; then
158+
# Update existing variable
159+
curl -s -f -X PATCH \
160+
-H "Authorization: token $GITHUB_TOKEN" \
161+
-H "Accept: application/vnd.github+json" \
162+
"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/variables/${VARIABLE_NAME}" \
163+
-d '{"name":"'"$VARIABLE_NAME"'","value":"false"}'
164+
echo "Updated repository variable $VARIABLE_NAME=false"
165+
else
166+
# Create new variable
167+
curl -s -f -X POST \
168+
-H "Authorization: token $GITHUB_TOKEN" \
169+
-H "Accept: application/vnd.github+json" \
170+
"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/variables" \
171+
-d '{"name":"'"$VARIABLE_NAME"'","value":"false"}'
172+
echo "Created repository variable $VARIABLE_NAME=false"
173+
fi
174+
175+
echo ""
176+
echo "Migration complete — CACHE_IMPORT_GITHUB set to false"
177+
echo "Import fallback will be disabled on next workflow runs."
178+
echo "To re-enable migration mode, delete or set CACHE_IMPORT_GITHUB=true in repository variables."
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
---
2+
name: Test cache migration (GitHub → S3)
3+
4+
on:
5+
push:
6+
branches: [ master ]
7+
pull_request:
8+
workflow_dispatch:
9+
10+
jobs:
11+
# Prerequisite: provision GitHub cache entries used by the test scenarios
12+
provision-github-cache:
13+
runs-on: github-ubuntu-latest-s
14+
name: "Provision GitHub cache entries"
15+
permissions:
16+
contents: read
17+
steps:
18+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
19+
- name: Save GitHub cache entry (for import scenario)
20+
run: mkdir -p ~/.cache/test-migration && echo "github-content" > ~/.cache/test-migration/test-file.txt
21+
- uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
22+
with:
23+
path: ~/.cache/test-migration
24+
key: test-migration-gh
25+
- name: Save GitHub cache entry (for S3-hit scenario, to confirm it is NOT restored)
26+
run: echo "github-content" > ~/.cache/test-migration/test-file.txt
27+
- uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
28+
with:
29+
path: ~/.cache/test-migration
30+
key: test-migration-s3hit
31+
32+
# Prerequisite: provision an S3 cache entry for the S3-hit scenario
33+
provision-s3-cache:
34+
runs-on: github-ubuntu-latest-s
35+
name: "Provision S3 cache entry"
36+
permissions:
37+
id-token: write
38+
contents: read
39+
steps:
40+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
41+
- name: Create S3 cache content
42+
run: mkdir -p ~/.cache/test-migration && echo "s3-content" > ~/.cache/test-migration/test-file.txt
43+
- name: Save to S3 cache
44+
uses: ./
45+
with:
46+
path: ~/.cache/test-migration
47+
key: test-migration-s3hit
48+
environment: dev
49+
backend: s3
50+
51+
# Scenario 1: S3 backend with import mode active (default behavior after migration begins)
52+
test-s3-import-enabled:
53+
needs: provision-github-cache
54+
runs-on: github-ubuntu-latest-s
55+
name: "S3 + import enabled (migration fallback)"
56+
permissions:
57+
id-token: write
58+
contents: read
59+
steps:
60+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
61+
- name: Cache with migration fallback
62+
id: cache
63+
uses: ./
64+
with:
65+
path: ~/.cache/test-migration
66+
key: test-migration-gh
67+
environment: dev
68+
backend: s3
69+
# import-github-cache defaults to true — no need to set it explicitly
70+
- name: Verify import succeeded
71+
run: |
72+
[[ "${{ steps.cache.outputs.cache-hit }}" == "true" ]] || { echo "ERROR: cache-hit is not true"; exit 1; }
73+
[[ "$(cat ~/.cache/test-migration/test-file.txt)" == "github-content" ]] || { echo "ERROR: unexpected content, not restored from GitHub"; exit 1; }
74+
rm -rf ~/.cache/test-migration # prevent saving to S3 so other scenarios don't find it
75+
76+
# Scenario 2: S3 backend with import mode explicitly disabled
77+
test-s3-import-disabled:
78+
needs: provision-github-cache
79+
runs-on: github-ubuntu-latest-s
80+
name: "S3 + import disabled (migration complete)"
81+
permissions:
82+
id-token: write
83+
contents: read
84+
steps:
85+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
86+
- name: Cache without migration fallback
87+
id: cache
88+
uses: ./
89+
with:
90+
path: ~/.cache/test-migration
91+
key: test-migration-gh
92+
environment: dev
93+
backend: s3
94+
import-github-cache: 'false'
95+
- name: Verify no import from GitHub
96+
run: |
97+
[[ "${{ steps.cache.outputs.cache-hit }}" != "true" ]] || { echo "ERROR: cache-hit is true but import should be disabled"; exit 1; }
98+
test ! -f ~/.cache/test-migration/test-file.txt || { echo "ERROR: cache content was restored but import should be disabled"; exit 1; }
99+
100+
# Scenario 3: S3 backend with import disabled via env var (repo variable pattern)
101+
test-s3-import-disabled-via-env:
102+
needs: provision-github-cache
103+
runs-on: github-ubuntu-latest-s
104+
name: "S3 + import disabled via env var"
105+
permissions:
106+
id-token: write
107+
contents: read
108+
env:
109+
CACHE_IMPORT_GITHUB: 'false'
110+
steps:
111+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
112+
- name: Cache without migration fallback (via env)
113+
id: cache
114+
uses: ./
115+
with:
116+
path: ~/.cache/test-migration
117+
key: test-migration-gh
118+
environment: dev
119+
backend: s3
120+
- name: Verify no import from GitHub
121+
run: |
122+
[[ "${{ steps.cache.outputs.cache-hit }}" != "true" ]] || { echo "ERROR: cache-hit is true but import should be disabled"; exit 1; }
123+
test ! -f ~/.cache/test-migration/test-file.txt || { echo "ERROR: cache content was restored but import should be disabled"; exit 1; }
124+
125+
# Scenario 4: S3 hit — GitHub import step must be skipped entirely
126+
test-s3-hit-skips-github-import:
127+
needs: [ provision-github-cache, provision-s3-cache ]
128+
runs-on: github-ubuntu-latest-s
129+
name: "S3 hit → no GitHub import"
130+
permissions:
131+
id-token: write
132+
contents: read
133+
steps:
134+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
135+
- name: Cache (S3 hit expected, import enabled)
136+
id: cache
137+
uses: ./
138+
with:
139+
path: ~/.cache/test-migration
140+
key: test-migration-s3hit
141+
environment: dev
142+
backend: s3
143+
# import-github-cache defaults to true — S3 hit should prevent GitHub from being tried
144+
- name: Verify S3 content restored, not GitHub
145+
run: |
146+
[[ "${{ steps.cache.outputs.cache-hit }}" == "true" ]] || { echo "ERROR: expected S3 hit"; exit 1; }
147+
[[ "$(cat ~/.cache/test-migration/test-file.txt)" == "s3-content" ]] || { echo "ERROR: unexpected content — GitHub import may have overridden S3"; exit 1; }

0 commit comments

Comments
 (0)