Skip to content

Commit 64c71fa

Browse files
authored
πŸš€ Release: beta β†’ master (#69)
* feat(deploy): enhance deployment scripts and health checks for Docker containers * feat(ci): enhance CodeQL workflows and add VPS readiness check for deployment
1 parent 37ec9a2 commit 64c71fa

10 files changed

Lines changed: 731 additions & 87 deletions

β€Ž.github/workflows/codeql-deep.ymlβ€Ž

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ permissions:
2929
security-events: write
3030

3131
jobs:
32-
analyze-deep:
32+
# Job name MUST stay "codeql-deep" β€” deploy.yml polls for this exact status
33+
# check, and branch protection on master references it as:
34+
# "CodeQL β€” Deep Scan (post-merge) / codeql-deep"
35+
codeql-deep:
3336
name: Deep Analyze (CodeQL)
3437
runs-on: ubuntu-latest
3538
timeout-minutes: 40
@@ -68,9 +71,6 @@ jobs:
6871
uses: github/codeql-action/analyze@v4
6972
with:
7073
category: "codeql-deep"
71-
# Upload unconditionally β€” results land in the Security tab regardless
72-
# of whether any alerts are found.
73-
upload: always
7474

7575
- name: Write deep-scan summary
7676
if: always()

β€Ž.github/workflows/codeql.ymlβ€Ž

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ permissions:
1818
security-events: write
1919

2020
jobs:
21-
analyze:
22-
name: Analyze (CodeQL)
21+
# Job name MUST stay "codeql-lite" β€” branch protection references this exact
22+
# status check: "CodeQL β€” PR Scan (lightweight) / codeql-lite"
23+
codeql-lite:
24+
name: CodeQL Lite (PR)
2325
runs-on: ubuntu-latest
2426
timeout-minutes: 15
2527

@@ -59,4 +61,4 @@ jobs:
5961
- name: Perform CodeQL Analysis
6062
uses: github/codeql-action/analyze@v4
6163
with:
62-
category: "codeql-pr"
64+
category: "codeql-lite"

β€Ž.github/workflows/deploy.ymlβ€Ž

Lines changed: 160 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
# Production Deployment Pipeline
44
#
55
# Design principles:
6-
# 1. Triggers on every push to master (no paths filter β€” ensures sync-beta always runs)
6+
# 1. Triggered ONLY after CodeQL deep scan completes successfully β€” no polling, no race.
7+
# Uses workflow_run event: deploy is event-driven, not concurrent with security scan.
78
# 2. Runs ALL validation from scratch β€” no trust built on PR results alone
89
# 3. Trivy scan runs BEFORE Docker push β€” vulnerable images never reach the registry
910
# 4. target: production + build-args mirror pr.yml exactly (bit-for-bit parity)
@@ -12,18 +13,33 @@
1213
# 7. timeout-minutes on every job β€” hung processes never block CI indefinitely
1314
# 8. npm ci retried up to 3x β€” registry flakiness never kills a valid deploy
1415
#
15-
# Parallel stages:
16-
# validate ─┐
17-
# test-api β”œβ”€β–Ί build-scan-push ─► deploy ─► api-health-gate ─► sync-infra ─► sync-monitoring ─► health-and-smoke
18-
# β”˜ β”‚
19-
# rollback β—„β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ (on failure)
16+
# Pipeline order:
17+
# codeql-gate
18+
# β”œβ”€β–Ί validate ─┐
19+
# └─► test-api β”œβ”€β–Ί build-scan-push ─► vps-readiness-check ─► deploy
20+
# β”˜ β”‚
21+
# api-health-gate β—„β”€β”€β”€β”€β”€β”€β”€β”€β”˜
22+
# β”‚
23+
# sync-infra ─► sync-monitoring ─► health-and-smoke
24+
# β”‚
25+
# rollback β—„β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ (on failure)
2026

2127
name: Deploy to Production
2228

2329
on:
24-
push:
30+
# Triggered ONLY when the CodeQL deep scan workflow completes on master.
31+
# This replaces the previous push trigger + polling approach:
32+
# - No race conditions (workflow_run fires AFTER codeql-deep finishes)
33+
# - No API polling loops or timing-dependent checks
34+
# - Deployment is blocked at the event level if CodeQL did not succeed
35+
workflow_run:
36+
workflows: ["CodeQL β€” Deep Scan (post-merge)"]
37+
types:
38+
- completed
2539
branches:
2640
- master
41+
# Manual dispatch retained for emergency/hotfix deploys.
42+
# The codeql-gate job enforces the conclusion check only for workflow_run.
2743
workflow_dispatch:
2844

2945
# Never cancel an in-progress deployment β€” let it finish or fail cleanly.
@@ -36,6 +52,56 @@ permissions:
3652
contents: read
3753

3854
jobs:
55+
# ---------------------------------------------------------------------------
56+
# JOB: codeql-gate
57+
#
58+
# First job in every deploy run. Two responsibilities:
59+
#
60+
# 1. SECURITY GATE (workflow_run only):
61+
# Reads github.event.workflow_run.conclusion and fails hard if CodeQL
62+
# did not pass. This makes the event-driven guarantee explicit and
63+
# visible in the pipeline UI.
64+
#
65+
# 2. SHA RESOLUTION:
66+
# On workflow_run, github.sha = HEAD of default branch at event time,
67+
# NOT the commit that triggered CodeQL. We must deploy exactly the SHA
68+
# that was scanned. Exports deploy_sha = github.event.workflow_run.head_sha
69+
# so all downstream jobs checkout and tag the correct commit.
70+
# On workflow_dispatch, deploy_sha = github.sha (HEAD of triggered branch).
71+
#
72+
# All subsequent jobs that do git checkout use ref: needs.codeql-gate.outputs.deploy_sha.
73+
# ---------------------------------------------------------------------------
74+
codeql-gate:
75+
name: CodeQL Security Gate
76+
runs-on: ubuntu-latest
77+
timeout-minutes: 5
78+
outputs:
79+
deploy_sha: ${{ steps.sha.outputs.deploy_sha }}
80+
steps:
81+
- name: Resolve deploy SHA
82+
id: sha
83+
run: |
84+
if [ "${{ github.event_name }}" = "workflow_run" ]; then
85+
echo "deploy_sha=${{ github.event.workflow_run.head_sha }}" >> "$GITHUB_OUTPUT"
86+
else
87+
echo "deploy_sha=${{ github.sha }}" >> "$GITHUB_OUTPUT"
88+
fi
89+
90+
- name: Verify CodeQL deep scan passed
91+
if: github.event_name == 'workflow_run'
92+
run: |
93+
CONCLUSION="${{ github.event.workflow_run.conclusion }}"
94+
SHA="${{ github.event.workflow_run.head_sha }}"
95+
echo "CodeQL deep scan conclusion : $CONCLUSION"
96+
echo "Scanned commit SHA : $SHA"
97+
if [ "$CONCLUSION" != "success" ]; then
98+
echo "::error::CodeQL deep scan did not pass (conclusion=$CONCLUSION)."
99+
echo " Deployment is blocked. Review findings before retrying:"
100+
echo " https://github.com/${{ github.repository }}/security/code-scanning"
101+
exit 1
102+
fi
103+
echo "βœ“ CodeQL gate passed β€” safe to deploy SHA $SHA"
104+
39105
# ---------------------------------------------------------------------------
40106
# JOB: validate
41107
#
@@ -45,6 +111,7 @@ jobs:
45111
validate:
46112
name: Validate (typecheck + audit)
47113
runs-on: ubuntu-latest
114+
needs: [codeql-gate]
48115
timeout-minutes: 10
49116
steps:
50117
- name: Confirm deployment trigger
@@ -58,27 +125,10 @@ jobs:
58125
59126
- name: Checkout
60127
uses: actions/checkout@v5
61-
62-
- name: Setup Node.js 24
63-
uses: actions/setup-node@v5
64128
with:
65-
node-version: '24'
66-
cache: npm
67-
cache-dependency-path: package-lock.json
68-
69-
- name: Install dependencies (with retry)
70-
run: |
71-
echo "::group::npm ci"
72-
for attempt in 1 2 3; do
73-
npm ci && break
74-
[ $attempt -eq 3 ] && { echo "::error::npm ci failed after 3 attempts"; exit 1; }
75-
echo "Attempt $attempt failed β€” retrying in 15s..."
76-
sleep 15
77-
done
78-
echo "::endgroup::"
129+
ref: ${{ needs.codeql-gate.outputs.deploy_sha }}
79130

80-
- name: Dependency vulnerability scan
81-
run: npm audit --omit=dev --audit-level=high
131+
- name: Setup Node.js 24
82132

83133
- name: TypeScript check
84134
run: npm run typecheck
@@ -102,6 +152,7 @@ jobs:
102152
test-api:
103153
name: API Tests (unit + integration)
104154
runs-on: ubuntu-latest
155+
needs: [codeql-gate]
105156
timeout-minutes: 15
106157
env:
107158
SUPABASE_URL: ${{ secrets.SUPABASE_URL_TEST }}
@@ -110,6 +161,8 @@ jobs:
110161
steps:
111162
- name: Checkout
112163
uses: actions/checkout@v5
164+
with:
165+
ref: ${{ needs.codeql-gate.outputs.deploy_sha }}
113166

114167
- name: Setup Node.js 24
115168
uses: actions/setup-node@v5
@@ -152,22 +205,29 @@ jobs:
152205
build-scan-push:
153206
name: Build, Scan & Push Docker Image
154207
runs-on: ubuntu-latest
155-
needs: [validate, test-api]
208+
needs: [codeql-gate, validate, test-api]
156209
timeout-minutes: 25
157210
permissions:
158211
contents: read
159212
packages: write
160213
security-events: write
161214
outputs:
162-
sha_short: ${{ steps.meta.outputs.sha_short }}
163-
digest: ${{ steps.digest.outputs.digest }}
215+
sha_short: ${{ steps.meta.outputs.sha_short }}
216+
digest: ${{ steps.digest.outputs.digest }}
217+
deploy_sha: ${{ steps.meta.outputs.deploy_sha }}
164218
steps:
165219
- name: Checkout
166220
uses: actions/checkout@v5
221+
with:
222+
ref: ${{ needs.codeql-gate.outputs.deploy_sha }}
167223

168224
- name: Extract commit SHA
169225
id: meta
170-
run: echo "sha_short=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
226+
env:
227+
DEPLOY_SHA: ${{ needs.codeql-gate.outputs.deploy_sha }}
228+
run: |
229+
echo "sha_short=${DEPLOY_SHA::7}" >> "$GITHUB_OUTPUT"
230+
echo "deploy_sha=$DEPLOY_SHA" >> "$GITHUB_OUTPUT"
171231
172232
- name: Set up Docker Buildx
173233
uses: docker/setup-buildx-action@v3
@@ -489,17 +549,59 @@ jobs:
489549
echo "| Registry | ghcr.io/${{ github.repository_owner }}/api |"
490550
} >> "$GITHUB_STEP_SUMMARY"
491551
552+
# ---------------------------------------------------------------------------
553+
# JOB: vps-readiness-check
554+
#
555+
# Validates the VPS is in a deployable state BEFORE running the deploy.
556+
# Runs in PARALLEL with vps-readiness-check (both depend on build-scan-push).
557+
# Both must succeed before deploy is allowed to proceed.
558+
#
559+
# Delegates to scripts/vps-readiness-check.sh which checks:
560+
# - Docker daemon running
561+
# - api_network exists (auto-created if missing)
562+
# - Ports 80/443 free from non-nginx processes
563+
# - No API containers with host port bindings
564+
# - Required .env file present
565+
# - Runtime directories present (auto-created if missing)
566+
# - Sufficient disk space (auto-prunes if borderline)
567+
# ---------------------------------------------------------------------------
568+
vps-readiness-check:
569+
name: VPS Readiness Gate
570+
runs-on: ubuntu-latest
571+
needs: [build-scan-push]
572+
timeout-minutes: 10
573+
steps:
574+
- name: Run VPS readiness check via SSH
575+
uses: appleboy/ssh-action@v1.0.3
576+
with:
577+
host: ${{ secrets.DO_HOST }}
578+
username: ${{ secrets.DO_USER }}
579+
key: ${{ secrets.DO_SSH_KEY }}
580+
script: |
581+
set -euo pipefail
582+
export DEPLOY_ROOT="${DEPLOY_ROOT:-$HOME/api}"
583+
[ -d "$DEPLOY_ROOT" ] || { echo "❌ DEPLOY_ROOT not found: $DEPLOY_ROOT β€” run vps-setup.sh first"; exit 1; }
584+
cd "$DEPLOY_ROOT"
585+
# Pull latest scripts without full deploy
586+
git fetch origin master --depth=1
587+
git checkout origin/master -- scripts/vps-readiness-check.sh 2>/dev/null || true
588+
chmod +x scripts/vps-readiness-check.sh
589+
./scripts/vps-readiness-check.sh
590+
492591
# ---------------------------------------------------------------------------
493592
# JOB: deploy
494593
#
495594
# Blue-Green deployment to VPS via SSH.
496595
# The deploy-bluegreen.sh script manages slot switching and container health.
596+
#
597+
# DEPENDENCY GATES (both must pass):
598+
# - vps-readiness-check: ensures VPS can accept the deployment
497599
# ---------------------------------------------------------------------------
498600
deploy:
499601
name: Deploy (Blue-Green SSH)
500602
runs-on: ubuntu-latest
501-
needs: [build-scan-push]
502-
timeout-minutes: 15
603+
needs: [build-scan-push, vps-readiness-check]
604+
timeout-minutes: 20
503605
steps:
504606
- name: Validate required deployment secrets
505607
env:
@@ -548,8 +650,10 @@ jobs:
548650
ls -la "$HOME/api"
549651
[ -d "$DEPLOY_ROOT" ] || { echo "❌ DEPLOY_ROOT not found: $DEPLOY_ROOT"; exit 1; }
550652
cd "$DEPLOY_ROOT"
653+
# Pin repo to the exact SHA that was built and scanned by CodeQL.
654+
# Prevents stale scripts from running if concurrent commits landed.
551655
git fetch origin
552-
git reset --hard origin/master
656+
git reset --hard ${{ needs.build-scan-push.outputs.deploy_sha }}
553657
chmod +x scripts/*.sh
554658
echo "=== Pre-deploy environment validation ==="
555659
./scripts/validate-env.sh --check-monitoring
@@ -573,6 +677,10 @@ jobs:
573677
ls -la "$DEPLOY_ROOT"
574678
[ -d "$DEPLOY_ROOT" ] || { echo "❌ DEPLOY_ROOT not found: $DEPLOY_ROOT"; exit 1; }
575679
cd "$DEPLOY_ROOT"
680+
# Enforce repo is at the exact SHA being deployed (issue 7 β€” prevents
681+
# stale deploy scripts if another commit landed during this pipeline run).
682+
git fetch origin
683+
git reset --hard ${{ needs.build-scan-push.outputs.deploy_sha }}
576684
chmod +x scripts/*.sh
577685
# Environment already validated in previous step
578686
./scripts/deploy-bluegreen.sh "${{ needs.build-scan-push.outputs.sha_short }}"
@@ -587,14 +695,24 @@ jobs:
587695
key: ${{ secrets.DO_SSH_KEY }}
588696
script: |
589697
ACTIVE_SLOT=$(cat /var/run/api/active-slot 2>/dev/null || echo "unknown")
698+
ACTIVE_CONTAINER="api-${ACTIVE_SLOT}"
590699
DEPLOY_STATUS="UNKNOWN"
591-
592-
# Check if health endpoint is responding (good sign of successful deploy)
593-
if timeout 5 curl -sf http://127.0.0.1:3000/health >/dev/null 2>&1; then
594-
DEPLOY_STATUS="SUCCESS"
700+
701+
# Health check via docker exec β€” NO host port binding required.
702+
# api containers live only on api_network; localhost:3000 here means
703+
# the container's own loopback (executed via docker exec).
704+
if docker inspect "$ACTIVE_CONTAINER" >/dev/null 2>&1; then
705+
if docker exec "$ACTIVE_CONTAINER" \
706+
curl -sf --max-time 5 "http://localhost:3000/health" >/dev/null 2>&1; then
707+
DEPLOY_STATUS="SUCCESS"
708+
else
709+
DEPLOY_STATUS="UNHEALTHY"
710+
fi
711+
else
712+
DEPLOY_STATUS="CONTAINER_MISSING"
595713
fi
596-
597-
echo "DEPLOY_STATE=$DEPLOY_STATUS| SLOT=$ACTIVE_SLOT | SHA=${{ github.sha }}"
714+
715+
echo "DEPLOY_STATE=$DEPLOY_STATUS | SLOT=$ACTIVE_SLOT | CONTAINER=$ACTIVE_CONTAINER | SHA=${{ github.sha }}"
598716
599717
# ---------------------------------------------------------------------------
600718
# JOB: api-health-gate (Step E+)
@@ -923,11 +1041,12 @@ jobs:
9231041
rollback:
9241042
name: Rollback Deployment (auto)
9251043
runs-on: ubuntu-latest
926-
needs: [deploy, api-health-gate, sync-infra, sync-monitoring, health-and-smoke]
1044+
needs: [vps-readiness-check, deploy, api-health-gate, sync-infra, sync-monitoring, health-and-smoke]
9271045
timeout-minutes: 10
9281046
if: |
9291047
always() &&
9301048
(
1049+
needs.vps-readiness-check.result == 'failure' ||
9311050
needs.deploy.result == 'failure' ||
9321051
needs.api-health-gate.result == 'failure' ||
9331052
needs.sync-infra.result == 'failure' ||
@@ -938,6 +1057,7 @@ jobs:
9381057
- name: Log rollback trigger
9391058
run: |
9401059
echo "ROLLBACK_TRIGGERED=TRUE | FAILED_JOBS:"
1060+
[ "${{ needs.vps-readiness-check.result }}" = "failure" ] && echo " - vps-readiness-check"
9411061
[ "${{ needs.deploy.result }}" = "failure" ] && echo " - deploy"
9421062
[ "${{ needs.api-health-gate.result }}" = "failure" ] && echo " - api-health-gate"
9431063
[ "${{ needs.sync-infra.result }}" = "failure" ] && echo " - sync-infra"

0 commit comments

Comments
Β (0)