Skip to content

feat: implement weekly base image digest rotation workflow for securi… #256

feat: implement weekly base image digest rotation workflow for securi…

feat: implement weekly base image digest rotation workflow for securi… #256

Workflow file for this run

# .github/workflows/deploy.yml
#
# Production Deployment Pipeline
#
# Design principles:
# 1. Triggers on every push to master (no paths filter — ensures sync-beta always runs)
# 2. Runs ALL validation from scratch — no trust built on PR results alone
# 3. Trivy scan runs BEFORE Docker push — vulnerable images never reach the registry
# 4. target: production + build-args mirror pr.yml exactly (bit-for-bit parity)
# 5. Image digest verified against PR simulation artifact when available
# 6. Blue-Green deploy with automatic rollback on health or smoke test failure
# 7. timeout-minutes on every job — hung processes never block CI indefinitely
# 8. npm ci retried up to 3x — registry flakiness never kills a valid deploy
#
# Parallel stages:
# validate ─┐
# test-api ├─► build-scan-push ─► deploy ─► sync-infra ─► health-and-smoke
# build-web ┘ │
# rollback ◄────────────┘ (on failure)
name: Deploy to Production
on:
push:
branches:
- master
workflow_dispatch:
# Never cancel an in-progress deployment — let it finish or fail cleanly.
concurrency:
group: production-deploy
cancel-in-progress: false
# Default to read-only. Jobs that need additional access declare it explicitly.
permissions:
contents: read
jobs:
# ---------------------------------------------------------------------------
# JOB: validate
#
# Fast pre-flight: TypeScript check + dependency audit.
# Runs in parallel with test-api and build-web to maximise pipeline speed.
# ---------------------------------------------------------------------------
validate:
name: Validate (typecheck + audit)
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Confirm deployment trigger
run: |
echo "========================================="
echo "Deployment triggered on master"
echo " Commit SHA : ${{ github.sha }}"
echo " Event : ${{ github.event_name }}"
echo " Ref : ${{ github.ref }}"
echo "========================================="
- name: Checkout
uses: actions/checkout@v5
- name: Setup Node.js 24
uses: actions/setup-node@v5
with:
node-version: '24'
cache: npm
cache-dependency-path: '**/package-lock.json'
- name: Install workspace dependencies (with retry)
run: |
echo "::group::npm ci"
for attempt in 1 2 3; do
npm ci && break
[ $attempt -eq 3 ] && { echo "::error::npm ci failed after 3 attempts"; exit 1; }
echo "Attempt $attempt failed — retrying in 15s..."
sleep 15
done
echo "::endgroup::"
- name: Build shared types
run: npm run build -w packages/types
- name: Dependency vulnerability scan
run: npm audit --omit=dev --audit-level=high
- name: TypeScript check (API)
working-directory: apps/api
run: npx tsc --noEmit
# ---------------------------------------------------------------------------
# JOB: test-api
#
# Full backend test suite — unit tests then integration tests.
# Runs in parallel with validate and build-web.
# ---------------------------------------------------------------------------
test-api:
name: API Tests (unit + integration)
runs-on: ubuntu-latest
timeout-minutes: 15
env:
SUPABASE_URL: ${{ secrets.SUPABASE_URL_TEST }}
SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY_TEST }}
SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY_TEST }}
steps:
- name: Checkout
uses: actions/checkout@v5
- name: Setup Node.js 24
uses: actions/setup-node@v5
with:
node-version: '24'
cache: npm
cache-dependency-path: '**/package-lock.json'
- name: Install workspace dependencies (with retry)
run: |
echo "::group::npm ci"
for attempt in 1 2 3; do
npm ci && break
[ $attempt -eq 3 ] && { echo "::error::npm ci failed after 3 attempts"; exit 1; }
echo "Attempt $attempt failed — retrying in 15s..."
sleep 15
done
echo "::endgroup::"
- name: Build shared types
run: npm run build -w packages/types
- name: Unit tests
working-directory: apps/api
run: npx vitest run tests/unit/
- name: Integration tests
working-directory: apps/api
run: npx vitest run tests/integration/
# ---------------------------------------------------------------------------
# JOB: build-web
#
# Full frontend validation and production build.
# Runs in parallel with validate and test-api.
# ---------------------------------------------------------------------------
build-web:
name: Frontend Build (typecheck + lint + build)
runs-on: ubuntu-latest
timeout-minutes: 15
env:
NEXT_PUBLIC_API_BASE_URL: /api/proxy
NEXT_PUBLIC_SUPABASE_URL: https://ci-placeholder.supabase.co
NEXT_PUBLIC_SUPABASE_ANON_KEY: ci-build-placeholder-anon-key
NEXT_PUBLIC_MAPBOX_TOKEN: pk.ci-build-placeholder
steps:
- name: Verify NEXT_PUBLIC_API_BASE_URL is set
run: |
if [ -z "$NEXT_PUBLIC_API_BASE_URL" ]; then
echo "::error::NEXT_PUBLIC_API_BASE_URL is not set. Add it to the job env block."
exit 1
fi
echo "NEXT_PUBLIC_API_BASE_URL=${NEXT_PUBLIC_API_BASE_URL}"
- name: Checkout
uses: actions/checkout@v5
- name: Setup Node.js 24
uses: actions/setup-node@v5
with:
node-version: '24'
cache: npm
cache-dependency-path: '**/package-lock.json'
- name: Install workspace dependencies (with retry)
run: |
echo "::group::npm ci"
for attempt in 1 2 3; do
npm ci && break
[ $attempt -eq 3 ] && { echo "::error::npm ci failed after 3 attempts"; exit 1; }
echo "Attempt $attempt failed — retrying in 15s..."
sleep 15
done
echo "::endgroup::"
- name: Build shared types
run: npm run build -w packages/types
- name: TypeScript check (web)
run: npm run typecheck -w apps/web
- name: ESLint (web)
run: npm run lint -w apps/web
- name: Next.js production build
run: npm run build -w apps/web
# ---------------------------------------------------------------------------
# JOB: build-scan-push
#
# Three-phase security gate — identical build config to pr.yml:
# Phase 1 — Build locally (target: production, same build-args, same cache)
# Phase 2 — Trivy scan: pinned aquasec/trivy:0.49.1 Docker image, exit-code 1
# on HIGH/CRITICAL (blocks push). NOT trivy-action — supply-chain safe.
# DB pre-pulled, scan runs --network none (air-gapped).
# Phase 3 — Push exact scanned image to GHCR (no rebuild)
#
# Image digest verification:
# After building, the digest is compared against the digest stored by
# pr.yml's production-simulation job. A match confirms bit-for-bit parity
# between what was validated in PR and what is being deployed.
# Comparison is best-effort (continue-on-error) because the merge commit
# SHA may differ from the PR head SHA on squash-merges.
# ---------------------------------------------------------------------------
build-scan-push:
name: Build, Scan & Push Docker Image
runs-on: ubuntu-latest
needs: [validate, test-api, build-web]
timeout-minutes: 25
permissions:
contents: read
packages: write
outputs:
sha_short: ${{ steps.meta.outputs.sha_short }}
digest: ${{ steps.digest.outputs.digest }}
steps:
- name: Checkout
uses: actions/checkout@v5
- name: Extract commit SHA
id: meta
run: echo "sha_short=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Pull base images (force fresh manifest, prevent stale GHA cache)
run: |
docker pull node:24.2.0-bookworm-slim
docker pull gcr.io/distroless/nodejs24-debian12:nonroot
# Phase 1: Build into local Docker daemon for scanning.
# EXACT same parameters as pr.yml production-simulation:
# target: production, build-args: NODE_ENV=production, GHA cache.
# CACHE_BUSTER forces rebuild when package-lock.json changes (prevents stale deps).
# Cache scoped to production to prevent cross-branch contamination from PR builds.
- name: Build Docker image (pre-scan, no push)
uses: docker/build-push-action@v6
with:
context: .
file: ./apps/api/Dockerfile
target: production
build-args: |
NODE_ENV=production
CACHE_BUSTER=${{ hashFiles('**/package-lock.json') }}
push: false
load: true
pull: true
tags: |
fieldtrack-backend:${{ steps.meta.outputs.sha_short }}
cache-from: type=gha,scope=production
cache-to: type=gha,mode=max,scope=production
# Verify Node.js runtime — exercises TLS stack, not just compile-time version constant.
# tls.createSecureContext() fails if libssl linkage is broken, proving runtime health.
- name: Verify Node.js runtime (TLS operational check)
run: |
IMAGE_NAME="fieldtrack-backend:${{ steps.meta.outputs.sha_short }}"
echo "Testing image: $IMAGE_NAME"
docker run --rm \
--entrypoint /nodejs/bin/node \
"$IMAGE_NAME" \
-e "
const crypto = require('crypto');
const tls = require('tls');
const ctx = tls.createSecureContext();
if (!ctx) { process.stderr.write('FAIL: TLS context failed\n'); process.exit(1); }
const h = crypto.createHash('sha256').update('smoke').digest('hex');
if (!h) { process.stderr.write('FAIL: hash failed\n'); process.exit(1); }
process.stdout.write('node=' + process.versions.node + ' openssl=' + process.versions.openssl + ' tls=ok\n');
"
# Capture the content-addressable image digest.
# With cache scoping and cache busting, digest should always reproduce correctly.
- name: Capture image digest
id: digest
run: |
IMAGE_NAME="fieldtrack-backend:${{ steps.meta.outputs.sha_short }}"
DIGEST=$(docker inspect "$IMAGE_NAME" --format='{{.Id}}')
echo "digest=$DIGEST" >> "$GITHUB_OUTPUT"
echo "=== Build traceability ==="
echo " Commit SHA : ${{ github.sha }}"
echo " Image tag : $IMAGE_NAME"
echo " Image digest : $DIGEST"
# Compare this digest with the one stored by pr.yml's production-simulation.
# A match = bit-for-bit parity. A mismatch = code/cache divergence (warning).
# continue-on-error: true — squash merges produce a new commit SHA, which
# may cause minor divergence even with identical source code.
- name: Verify image digest parity with PR simulation
continue-on-error: true
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Find the PR number associated with this merge commit
PR_NUMBER=$(gh api \
"/repos/${{ github.repository }}/commits/${{ github.sha }}/pulls" \
--header "X-GitHub-Api-Version: 2022-11-28" \
--jq '.[0].number // empty' 2>/dev/null || echo "")
if [ -z "$PR_NUMBER" ]; then
echo "No associated PR found for commit ${{ github.sha }} — skipping digest comparison."
exit 0
fi
echo "Associated PR: #${PR_NUMBER}"
# Find the most recent successful pr.yml run for this PR
RUN_ID=$(gh run list \
--repo "${{ github.repository }}" \
--workflow "pr.yml" \
--json databaseId,conclusion,headSha \
--jq "map(select(.conclusion == \"success\")) | .[0].databaseId // empty" \
2>/dev/null || echo "")
if [ -z "$RUN_ID" ]; then
echo "No successful PR validation run found — skipping digest comparison."
exit 0
fi
# Download the image-digest artifact from that run
gh run download "$RUN_ID" \
--repo "${{ github.repository }}" \
--name "image-digest-pr-${PR_NUMBER}" \
--dir /tmp/pr-digest \
2>/dev/null || true
if [ ! -f /tmp/pr-digest/image-digest.txt ]; then
echo "PR image-digest artifact not found — skipping comparison."
exit 0
fi
PR_DIGEST=$(cat /tmp/pr-digest/image-digest.txt)
DEPLOY_DIGEST="${{ steps.digest.outputs.digest }}"
echo "PR simulation digest: $PR_DIGEST"
echo "Deploy image digest: $DEPLOY_DIGEST"
if [ "$PR_DIGEST" = "$DEPLOY_DIGEST" ]; then
echo "✓ Digest match — bit-for-bit parity confirmed between PR and deploy."
else
echo "⚠ Digest mismatch — builds diverged between PR and deploy."
echo " Expected on squash-merges where the commit SHA changes."
echo " Ensure no source changes occurred between PR approval and deploy trigger."
fi
# Phase 2: Trivy scan — image pinned by immutable digest, NOT trivy-action.
# aquasec/trivy:0.49.1 → sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc
# Identical severity gates to pr.yml (HIGH,CRITICAL / exit-code 1).
# Two-phase: DB downloaded first (needs network), then scan runs --network none.
- name: Get date for Trivy DB cache key
id: trivy-date
run: echo "date=$(date +'%Y-%m-%d')" >> "$GITHUB_OUTPUT"
- name: Cache Trivy DB (daily refresh)
uses: actions/cache@v4
with:
path: /tmp/trivy-cache
key: trivy-db-${{ runner.os }}-${{ steps.trivy-date.outputs.date }}
restore-keys: |
trivy-db-${{ runner.os }}-
- name: Pull Trivy vulnerability database
run: |
docker run --rm \
-v /tmp/trivy-cache:/root/.cache \
aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc \
image --download-db-only
- name: Scan image with Trivy (HIGH/CRITICAL, ignore-unfixed)
env:
IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }}
run: |
SCAN_PASSED=false
for i in 1 2 3; do
if docker run --rm \
--network none \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp/trivy-cache:/root/.cache \
-v "$(pwd)/.trivyignore:/tmp/.trivyignore:ro" \
aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc image \
--skip-db-update \
--ignore-unfixed \
--severity HIGH,CRITICAL \
--exit-code 1 \
--ignorefile /tmp/.trivyignore \
"$IMAGE_NAME"; then
SCAN_PASSED=true
break
fi
echo "Trivy attempt $i failed..."
[ "$i" -lt 3 ] && sleep 5
done
if [ "$SCAN_PASSED" != "true" ]; then
echo "::error::Trivy scan failed after 3 attempts — HIGH/CRITICAL vulnerabilities found or scan error."
exit 1
fi
echo "✓ Trivy scan passed (HIGH/CRITICAL, ignore-unfixed)"
- name: Scan for unfixed CRITICAL vulnerabilities (informational)
continue-on-error: true
env:
IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }}
run: |
UNFIXED_COUNT=$(docker run --rm \
--network none \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp/trivy-cache:/root/.cache \
aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc image \
--skip-db-update \
--severity CRITICAL \
--format json \
"$IMAGE_NAME" | jq '[.Results[]?.Misconfigurations[]? // .Results[]?.Vulnerabilities[]? | select(.FixedVersion == null or .FixedVersion == "")] | length')
if [ "$UNFIXED_COUNT" -gt 0 ]; then
echo "⚠ WARNING: $UNFIXED_COUNT unfixed CRITICAL vulnerabilities found"
echo " (No patches available upstream — waiting for vendor fix)"
docker run --rm \
--network none \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp/trivy-cache:/root/.cache \
aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc image \
--skip-db-update \
--severity CRITICAL \
"$IMAGE_NAME" >> /tmp/unfixed-critical.log || true
else
echo "✓ No unfixed CRITICAL vulnerabilities"
fi
# Phase 3: Scan passed — push the exact scanned image (same layer digests).
# Uses docker tag + push rather than rebuilding to guarantee what was scanned
# is exactly what lands in the registry.
- name: Verify image digest unchanged before push
env:
IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }}
IMAGE_DIGEST: ${{ steps.digest.outputs.digest }}
run: |
# docker inspect .Id returns the config digest (sha256:...) which is
# stable across tag operations — same value captured in the digest step.
CURRENT=$(docker inspect "$IMAGE_NAME" --format='{{.Id}}')
echo "Expected digest : $IMAGE_DIGEST"
echo "Current digest : $CURRENT"
if [ "$CURRENT" != "$IMAGE_DIGEST" ]; then
echo "ERROR: image digest changed between scan and push — aborting."
exit 1
fi
echo "✓ Digest verified — pushing exactly what was scanned."
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Push verified image to registry
run: |
OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
docker tag \
fieldtrack-backend:${{ steps.meta.outputs.sha_short }} \
ghcr.io/${OWNER}/fieldtrack-backend:${{ steps.meta.outputs.sha_short }}
docker push ghcr.io/${OWNER}/fieldtrack-backend:${{ steps.meta.outputs.sha_short }}
echo "✓ Pushed ghcr.io/${OWNER}/fieldtrack-backend:${{ steps.meta.outputs.sha_short }}"
# Use the same pinned Trivy image to generate the SBOM — no additional
# tool dependency, no unpinned action, same supply-chain guarantees.
- name: Generate SBOM (CycloneDX)
env:
IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }}
run: |
docker run --rm \
-v /var/run/docker.sock:/var/run/docker.sock \
aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc image \
--format cyclonedx \
--output /dev/stdout \
"$IMAGE_NAME" > sbom.json
- name: Upload SBOM artifact
uses: actions/upload-artifact@v4
with:
name: sbom-${{ steps.meta.outputs.sha_short }}
path: sbom.json
retention-days: 90
- name: Save build provenance
env:
IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }}
IMAGE_DIGEST: ${{ steps.digest.outputs.digest }}
run: |
echo "commit=${{ github.sha }}" > provenance.txt
echo "ref=${{ github.ref }}" >> provenance.txt
echo "image=${IMAGE_NAME}" >> provenance.txt
echo "digest=${IMAGE_DIGEST}" >> provenance.txt
echo "workflow=${{ github.workflow }}" >> provenance.txt
echo "run_id=${{ github.run_id }}" >> provenance.txt
- name: Upload provenance artifact
uses: actions/upload-artifact@v4
with:
name: provenance-${{ steps.meta.outputs.sha_short }}
path: provenance.txt
retention-days: 90
- name: Build & scan summary
if: always()
env:
IMAGE_DIGEST: ${{ steps.digest.outputs.digest }}
run: |
SBOM_COUNT=$(python3 -c "import json; d=json.load(open('sbom.json')); print(len(d.get('components', [])))" 2>/dev/null || echo 'n/a')
{
echo "## Build · Scan · Push"
echo "| Field | Value |"
echo "|---|---|"
echo "| Commit SHA | \`${{ github.sha }}\` |"
echo "| Image tag | \`fieldtrack-backend:${{ steps.meta.outputs.sha_short }}\` |"
echo "| Image digest | \`${IMAGE_DIGEST}\` |"
echo "| SBOM components | ${SBOM_COUNT} |"
echo "| Trivy gate | HIGH,CRITICAL / exit-code 1 / ignore-unfixed |"
echo "| Registry | ghcr.io/${{ github.repository_owner }}/fieldtrack-backend |"
} >> "$GITHUB_STEP_SUMMARY"
# ---------------------------------------------------------------------------
# JOB: deploy
#
# Blue-Green deployment to VPS via SSH.
# The deploy-bluegreen.sh script manages slot switching and container health.
# ---------------------------------------------------------------------------
deploy:
name: Deploy (Blue-Green SSH)
runs-on: ubuntu-latest
needs: [build-scan-push]
timeout-minutes: 15
steps:
- name: Validate required deployment secrets
env:
API_BASE_URL: ${{ secrets.API_BASE_URL }}
CORS_ORIGIN: ${{ secrets.CORS_ORIGIN }}
run: |
if [ -z "${API_BASE_URL:-}" ]; then
echo "::error::API_BASE_URL secret is not set. Deployment aborted."
exit 1
fi
echo "✓ API_BASE_URL is set"
if [ -z "${CORS_ORIGIN:-}" ]; then
echo "::error::CORS_ORIGIN secret is not set. Deployment aborted."
exit 1
fi
echo "✓ CORS_ORIGIN is set"
- name: Validate environment contract before deploy
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
set -euo pipefail
export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0"
cd "$DEPLOY_ROOT"
git fetch origin
git reset --hard origin/master
chmod +x apps/api/scripts/*.sh
echo "=== Pre-deploy environment validation ==="
./apps/api/scripts/validate-env.sh --check-monitoring
echo "✓ Environment contract validated"
- name: Blue-Green deploy via SSH
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
set -euo pipefail
T0=$(date +%s)
export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0"
cd "$DEPLOY_ROOT"
chmod +x apps/api/scripts/*.sh
# Environment already validated in previous step
./apps/api/scripts/deploy-bluegreen.sh "${{ needs.build-scan-push.outputs.sha_short }}"
echo "✓ Deploy completed in $(($(date +%s) - T0))s"
- name: Log deployment state (slot + SHA for debugging)
uses: appleboy/ssh-action@v1.0.3
if: always()
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
ACTIVE_SLOT=$(cat /var/run/fieldtrack/active-slot 2>/dev/null || echo "unknown")
DEPLOY_STATUS="UNKNOWN"
# Check if health endpoint is responding (good sign of successful deploy)
if timeout 5 curl -sf http://127.0.0.1:3000/health >/dev/null 2>&1; then
DEPLOY_STATUS="SUCCESS"
fi
echo "DEPLOY_STATE=$DEPLOY_STATUS| SLOT=$ACTIVE_SLOT | SHA=${{ github.sha }}"
# ---------------------------------------------------------------------------
# JOB: sync-infra
#
# Syncs Nginx config (with slot-aware port substitution).
# Monitoring restarts are handled exclusively by deploy-bluegreen.sh.
# ---------------------------------------------------------------------------
sync-infra:
name: Sync Infrastructure (nginx)
runs-on: ubuntu-latest
needs: [deploy]
timeout-minutes: 10
steps:
- name: Sync infrastructure configs via SSH
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
set -euo pipefail
T0=$(date +%s)
export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0"
INFRA_DIR="$DEPLOY_ROOT/infra"
NGINX_LIVE="/etc/nginx/sites-enabled/fieldtrack.conf"
ACTIVE_SLOT_FILE="/var/run/fieldtrack/active-slot"
ACTIVE_SLOT=$(cat "$ACTIVE_SLOT_FILE" 2>/dev/null || echo "blue")
if [ "$ACTIVE_SLOT" = "green" ]; then BACKEND_PORT=3002; else BACKEND_PORT=3001; fi
# Load env from apps/api/.env — exports DEPLOY_ROOT, API_HOSTNAME, and all
# app variables. DEPLOY_ROOT is already exported above; load-env.sh uses it.
source "$DEPLOY_ROOT/apps/api/scripts/load-env.sh"
echo "✓ API_HOSTNAME: $API_HOSTNAME"
echo "=== Syncing Nginx (slot: $ACTIVE_SLOT, port: $BACKEND_PORT) ==="
sudo cp "$NGINX_LIVE" /tmp/fieldtrack.conf.bak 2>/dev/null || true
NGINX_TMP=$(mktemp /tmp/fieldtrack-nginx.XXXXXX.conf)
sed \
-e "s|__BACKEND_PORT__|$BACKEND_PORT|g" \
-e "s|__API_HOSTNAME__|$API_HOSTNAME|g" \
"$INFRA_DIR/nginx/fieldtrack.conf" > "$NGINX_TMP"
sudo cp "$NGINX_TMP" "$NGINX_LIVE"
rm -f "$NGINX_TMP"
if ! sudo nginx -t 2>&1; then
echo "Nginx test failed — restoring backup..."
sudo cp /tmp/fieldtrack.conf.bak "$NGINX_LIVE"
exit 1
fi
sudo systemctl reload nginx
echo "✓ Nginx reloaded."
echo "✓ Infra sync completed in $(($(date +%s) - T0))s"
# ---------------------------------------------------------------------------
# JOB: health-and-smoke
#
# Step 1: Poll /health and /ready until they return 200 (up to 60 s each).
# Step 2: Run the full smoke test suite (login + core API flows).
# Failure here triggers the rollback job automatically.
# ---------------------------------------------------------------------------
health-and-smoke:
name: Health Checks & Smoke Tests
runs-on: ubuntu-latest
needs: [sync-infra]
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v5
- name: Wait for /health endpoint (via VPS)
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
set -euo pipefail
export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0"
cd "$DEPLOY_ROOT"
source apps/api/scripts/load-env.sh
echo "=== Checking /health via VPS (API_HOSTNAME=$API_HOSTNAME) ==="
for i in $(seq 1 30); do
echo "---- Attempt $i ----"
STATUS=$(curl -sS \
--resolve "${API_HOSTNAME}:443:127.0.0.1" \
-o /tmp/resp.txt \
-w "%{http_code}" \
https://${API_HOSTNAME}/health \
--insecure || echo "000")
BODY=$(cat /tmp/resp.txt 2>/dev/null || echo "")
echo "HTTP: $STATUS"
echo "BODY: $BODY"
if [ "$STATUS" = "200" ] && echo "$BODY" | grep -q '"status":"ok"'; then
echo "✓ /health OK (attempt $i)"
exit 0
fi
sleep 2
done
echo "❌ /health failed"
exit 1
- name: Wait for /health endpoint (final public check)
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
set -euo pipefail
export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0"
cd "$DEPLOY_ROOT"
source apps/api/scripts/load-env.sh
echo "=== Final health check via public endpoint (API_HOSTNAME=$API_HOSTNAME) ==="
for i in $(seq 1 10); do
echo "---- Attempt $i ----"
STATUS=$(curl -sS \
--resolve "${API_HOSTNAME}:443:127.0.0.1" \
-o /tmp/resp.txt \
-w "%{http_code}" \
https://${API_HOSTNAME}/health \
--insecure || echo "000")
BODY=$(cat /tmp/resp.txt 2>/dev/null || echo "")
echo "HTTP: $STATUS"
echo "BODY: $BODY"
if [ "$STATUS" = "200" ] && echo "$BODY" | grep -q '"status":"ok"'; then
echo "✓ /health OK (attempt $i)"
exit 0
fi
sleep 2
done
echo "❌ /health failed"
exit 1
- name: Run smoke tests
env:
API_BASE_URL: ${{ secrets.API_BASE_URL }}
FT_EMP_EMAIL: ${{ secrets.FT_EMP_EMAIL }}
FT_EMP_PASSWORD: ${{ secrets.FT_EMP_PASSWORD }}
FT_ADMIN_EMAIL: ${{ secrets.FT_ADMIN_EMAIL }}
FT_ADMIN_PASSWORD: ${{ secrets.FT_ADMIN_PASSWORD }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY }}
run: |
chmod +x apps/api/scripts/smoke-test.sh
./apps/api/scripts/smoke-test.sh
- name: Upload smoke test report
if: always()
uses: actions/upload-artifact@v4
with:
name: smoke-test-report-${{ github.sha }}
path: smoke-report.json
retention-days: 30
- name: Deployment summary
run: |
echo "====================================================="
echo " Production Deployment: COMPLETE ✅"
echo "====================================================="
echo " Commit: ${{ github.sha }}"
echo " /health: OK"
echo " /ready: OK"
echo " Smoke: passed"
echo "====================================================="
# ---------------------------------------------------------------------------
# JOB: rollback
#
# Triggered automatically when deploy, sync-infra, OR health-and-smoke fails.
# Restores the previously healthy Blue-Green slot via the rollback script.
# 'if: always()' ensures this job can evaluate even if upstream jobs failed.
# ---------------------------------------------------------------------------
rollback:
name: Rollback Deployment (auto)
runs-on: ubuntu-latest
needs: [deploy, sync-infra, health-and-smoke]
timeout-minutes: 10
if: |
always() &&
(
needs.deploy.result == 'failure' ||
needs.sync-infra.result == 'failure' ||
needs.health-and-smoke.result == 'failure'
)
steps:
- name: Log rollback trigger
run: |
echo "ROLLBACK_TRIGGERED=TRUE | FAILED_JOBS:"
[ "${{ needs.deploy.result }}" = "failure" ] && echo " - deploy"
[ "${{ needs.sync-infra.result }}" = "failure" ] && echo " - sync-infra"
[ "${{ needs.health-and-smoke.result }}" = "failure" ] && echo " - health-and-smoke"
echo "SHA=${{ github.sha }}"
- name: Rollback on VPS
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
set -euo pipefail
export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0"
cd "$DEPLOY_ROOT"
chmod +x apps/api/scripts/*.sh
./apps/api/scripts/rollback.sh --auto
# Log final state
ACTIVE_SLOT=$(cat /var/run/fieldtrack/active-slot 2>/dev/null || echo "unknown")
echo "ROLLBACK_COMPLETE | ACTIVE_SLOT=$ACTIVE_SLOT | SHA=${{ github.sha }}"