feat: implement weekly base image digest rotation workflow for securi… #256
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # .github/workflows/deploy.yml | |
| # | |
| # Production Deployment Pipeline | |
| # | |
| # Design principles: | |
| # 1. Triggers on every push to master (no paths filter — ensures sync-beta always runs) | |
| # 2. Runs ALL validation from scratch — no trust built on PR results alone | |
| # 3. Trivy scan runs BEFORE Docker push — vulnerable images never reach the registry | |
| # 4. target: production + build-args mirror pr.yml exactly (bit-for-bit parity) | |
| # 5. Image digest verified against PR simulation artifact when available | |
| # 6. Blue-Green deploy with automatic rollback on health or smoke test failure | |
| # 7. timeout-minutes on every job — hung processes never block CI indefinitely | |
| # 8. npm ci retried up to 3x — registry flakiness never kills a valid deploy | |
| # | |
| # Parallel stages: | |
| # validate ─┐ | |
| # test-api ├─► build-scan-push ─► deploy ─► sync-infra ─► health-and-smoke | |
| # build-web ┘ │ | |
| # rollback ◄────────────┘ (on failure) | |
| name: Deploy to Production | |
| on: | |
| push: | |
| branches: | |
| - master | |
| workflow_dispatch: | |
| # Never cancel an in-progress deployment — let it finish or fail cleanly. | |
| concurrency: | |
| group: production-deploy | |
| cancel-in-progress: false | |
| # Default to read-only. Jobs that need additional access declare it explicitly. | |
| permissions: | |
| contents: read | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # JOB: validate | |
| # | |
| # Fast pre-flight: TypeScript check + dependency audit. | |
| # Runs in parallel with test-api and build-web to maximise pipeline speed. | |
| # --------------------------------------------------------------------------- | |
| validate: | |
| name: Validate (typecheck + audit) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Confirm deployment trigger | |
| run: | | |
| echo "=========================================" | |
| echo "Deployment triggered on master" | |
| echo " Commit SHA : ${{ github.sha }}" | |
| echo " Event : ${{ github.event_name }}" | |
| echo " Ref : ${{ github.ref }}" | |
| echo "=========================================" | |
| - name: Checkout | |
| uses: actions/checkout@v5 | |
| - name: Setup Node.js 24 | |
| uses: actions/setup-node@v5 | |
| with: | |
| node-version: '24' | |
| cache: npm | |
| cache-dependency-path: '**/package-lock.json' | |
| - name: Install workspace dependencies (with retry) | |
| run: | | |
| echo "::group::npm ci" | |
| for attempt in 1 2 3; do | |
| npm ci && break | |
| [ $attempt -eq 3 ] && { echo "::error::npm ci failed after 3 attempts"; exit 1; } | |
| echo "Attempt $attempt failed — retrying in 15s..." | |
| sleep 15 | |
| done | |
| echo "::endgroup::" | |
| - name: Build shared types | |
| run: npm run build -w packages/types | |
| - name: Dependency vulnerability scan | |
| run: npm audit --omit=dev --audit-level=high | |
| - name: TypeScript check (API) | |
| working-directory: apps/api | |
| run: npx tsc --noEmit | |
| # --------------------------------------------------------------------------- | |
| # JOB: test-api | |
| # | |
| # Full backend test suite — unit tests then integration tests. | |
| # Runs in parallel with validate and build-web. | |
| # --------------------------------------------------------------------------- | |
| test-api: | |
| name: API Tests (unit + integration) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| env: | |
| SUPABASE_URL: ${{ secrets.SUPABASE_URL_TEST }} | |
| SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY_TEST }} | |
| SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY_TEST }} | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v5 | |
| - name: Setup Node.js 24 | |
| uses: actions/setup-node@v5 | |
| with: | |
| node-version: '24' | |
| cache: npm | |
| cache-dependency-path: '**/package-lock.json' | |
| - name: Install workspace dependencies (with retry) | |
| run: | | |
| echo "::group::npm ci" | |
| for attempt in 1 2 3; do | |
| npm ci && break | |
| [ $attempt -eq 3 ] && { echo "::error::npm ci failed after 3 attempts"; exit 1; } | |
| echo "Attempt $attempt failed — retrying in 15s..." | |
| sleep 15 | |
| done | |
| echo "::endgroup::" | |
| - name: Build shared types | |
| run: npm run build -w packages/types | |
| - name: Unit tests | |
| working-directory: apps/api | |
| run: npx vitest run tests/unit/ | |
| - name: Integration tests | |
| working-directory: apps/api | |
| run: npx vitest run tests/integration/ | |
| # --------------------------------------------------------------------------- | |
| # JOB: build-web | |
| # | |
| # Full frontend validation and production build. | |
| # Runs in parallel with validate and test-api. | |
| # --------------------------------------------------------------------------- | |
| build-web: | |
| name: Frontend Build (typecheck + lint + build) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| env: | |
| NEXT_PUBLIC_API_BASE_URL: /api/proxy | |
| NEXT_PUBLIC_SUPABASE_URL: https://ci-placeholder.supabase.co | |
| NEXT_PUBLIC_SUPABASE_ANON_KEY: ci-build-placeholder-anon-key | |
| NEXT_PUBLIC_MAPBOX_TOKEN: pk.ci-build-placeholder | |
| steps: | |
| - name: Verify NEXT_PUBLIC_API_BASE_URL is set | |
| run: | | |
| if [ -z "$NEXT_PUBLIC_API_BASE_URL" ]; then | |
| echo "::error::NEXT_PUBLIC_API_BASE_URL is not set. Add it to the job env block." | |
| exit 1 | |
| fi | |
| echo "NEXT_PUBLIC_API_BASE_URL=${NEXT_PUBLIC_API_BASE_URL}" | |
| - name: Checkout | |
| uses: actions/checkout@v5 | |
| - name: Setup Node.js 24 | |
| uses: actions/setup-node@v5 | |
| with: | |
| node-version: '24' | |
| cache: npm | |
| cache-dependency-path: '**/package-lock.json' | |
| - name: Install workspace dependencies (with retry) | |
| run: | | |
| echo "::group::npm ci" | |
| for attempt in 1 2 3; do | |
| npm ci && break | |
| [ $attempt -eq 3 ] && { echo "::error::npm ci failed after 3 attempts"; exit 1; } | |
| echo "Attempt $attempt failed — retrying in 15s..." | |
| sleep 15 | |
| done | |
| echo "::endgroup::" | |
| - name: Build shared types | |
| run: npm run build -w packages/types | |
| - name: TypeScript check (web) | |
| run: npm run typecheck -w apps/web | |
| - name: ESLint (web) | |
| run: npm run lint -w apps/web | |
| - name: Next.js production build | |
| run: npm run build -w apps/web | |
| # --------------------------------------------------------------------------- | |
| # JOB: build-scan-push | |
| # | |
| # Three-phase security gate — identical build config to pr.yml: | |
| # Phase 1 — Build locally (target: production, same build-args, same cache) | |
| # Phase 2 — Trivy scan: pinned aquasec/trivy:0.49.1 Docker image, exit-code 1 | |
| # on HIGH/CRITICAL (blocks push). NOT trivy-action — supply-chain safe. | |
| # DB pre-pulled, scan runs --network none (air-gapped). | |
| # Phase 3 — Push exact scanned image to GHCR (no rebuild) | |
| # | |
| # Image digest verification: | |
| # After building, the digest is compared against the digest stored by | |
| # pr.yml's production-simulation job. A match confirms bit-for-bit parity | |
| # between what was validated in PR and what is being deployed. | |
| # Comparison is best-effort (continue-on-error) because the merge commit | |
| # SHA may differ from the PR head SHA on squash-merges. | |
| # --------------------------------------------------------------------------- | |
| build-scan-push: | |
| name: Build, Scan & Push Docker Image | |
| runs-on: ubuntu-latest | |
| needs: [validate, test-api, build-web] | |
| timeout-minutes: 25 | |
| permissions: | |
| contents: read | |
| packages: write | |
| outputs: | |
| sha_short: ${{ steps.meta.outputs.sha_short }} | |
| digest: ${{ steps.digest.outputs.digest }} | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v5 | |
| - name: Extract commit SHA | |
| id: meta | |
| run: echo "sha_short=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Pull base images (force fresh manifest, prevent stale GHA cache) | |
| run: | | |
| docker pull node:24.2.0-bookworm-slim | |
| docker pull gcr.io/distroless/nodejs24-debian12:nonroot | |
| # Phase 1: Build into local Docker daemon for scanning. | |
| # EXACT same parameters as pr.yml production-simulation: | |
| # target: production, build-args: NODE_ENV=production, GHA cache. | |
| # CACHE_BUSTER forces rebuild when package-lock.json changes (prevents stale deps). | |
| # Cache scoped to production to prevent cross-branch contamination from PR builds. | |
| - name: Build Docker image (pre-scan, no push) | |
| uses: docker/build-push-action@v6 | |
| with: | |
| context: . | |
| file: ./apps/api/Dockerfile | |
| target: production | |
| build-args: | | |
| NODE_ENV=production | |
| CACHE_BUSTER=${{ hashFiles('**/package-lock.json') }} | |
| push: false | |
| load: true | |
| pull: true | |
| tags: | | |
| fieldtrack-backend:${{ steps.meta.outputs.sha_short }} | |
| cache-from: type=gha,scope=production | |
| cache-to: type=gha,mode=max,scope=production | |
| # Verify Node.js runtime — exercises TLS stack, not just compile-time version constant. | |
| # tls.createSecureContext() fails if libssl linkage is broken, proving runtime health. | |
| - name: Verify Node.js runtime (TLS operational check) | |
| run: | | |
| IMAGE_NAME="fieldtrack-backend:${{ steps.meta.outputs.sha_short }}" | |
| echo "Testing image: $IMAGE_NAME" | |
| docker run --rm \ | |
| --entrypoint /nodejs/bin/node \ | |
| "$IMAGE_NAME" \ | |
| -e " | |
| const crypto = require('crypto'); | |
| const tls = require('tls'); | |
| const ctx = tls.createSecureContext(); | |
| if (!ctx) { process.stderr.write('FAIL: TLS context failed\n'); process.exit(1); } | |
| const h = crypto.createHash('sha256').update('smoke').digest('hex'); | |
| if (!h) { process.stderr.write('FAIL: hash failed\n'); process.exit(1); } | |
| process.stdout.write('node=' + process.versions.node + ' openssl=' + process.versions.openssl + ' tls=ok\n'); | |
| " | |
| # Capture the content-addressable image digest. | |
| # With cache scoping and cache busting, digest should always reproduce correctly. | |
| - name: Capture image digest | |
| id: digest | |
| run: | | |
| IMAGE_NAME="fieldtrack-backend:${{ steps.meta.outputs.sha_short }}" | |
| DIGEST=$(docker inspect "$IMAGE_NAME" --format='{{.Id}}') | |
| echo "digest=$DIGEST" >> "$GITHUB_OUTPUT" | |
| echo "=== Build traceability ===" | |
| echo " Commit SHA : ${{ github.sha }}" | |
| echo " Image tag : $IMAGE_NAME" | |
| echo " Image digest : $DIGEST" | |
| # Compare this digest with the one stored by pr.yml's production-simulation. | |
| # A match = bit-for-bit parity. A mismatch = code/cache divergence (warning). | |
| # continue-on-error: true — squash merges produce a new commit SHA, which | |
| # may cause minor divergence even with identical source code. | |
| - name: Verify image digest parity with PR simulation | |
| continue-on-error: true | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| # Find the PR number associated with this merge commit | |
| PR_NUMBER=$(gh api \ | |
| "/repos/${{ github.repository }}/commits/${{ github.sha }}/pulls" \ | |
| --header "X-GitHub-Api-Version: 2022-11-28" \ | |
| --jq '.[0].number // empty' 2>/dev/null || echo "") | |
| if [ -z "$PR_NUMBER" ]; then | |
| echo "No associated PR found for commit ${{ github.sha }} — skipping digest comparison." | |
| exit 0 | |
| fi | |
| echo "Associated PR: #${PR_NUMBER}" | |
| # Find the most recent successful pr.yml run for this PR | |
| RUN_ID=$(gh run list \ | |
| --repo "${{ github.repository }}" \ | |
| --workflow "pr.yml" \ | |
| --json databaseId,conclusion,headSha \ | |
| --jq "map(select(.conclusion == \"success\")) | .[0].databaseId // empty" \ | |
| 2>/dev/null || echo "") | |
| if [ -z "$RUN_ID" ]; then | |
| echo "No successful PR validation run found — skipping digest comparison." | |
| exit 0 | |
| fi | |
| # Download the image-digest artifact from that run | |
| gh run download "$RUN_ID" \ | |
| --repo "${{ github.repository }}" \ | |
| --name "image-digest-pr-${PR_NUMBER}" \ | |
| --dir /tmp/pr-digest \ | |
| 2>/dev/null || true | |
| if [ ! -f /tmp/pr-digest/image-digest.txt ]; then | |
| echo "PR image-digest artifact not found — skipping comparison." | |
| exit 0 | |
| fi | |
| PR_DIGEST=$(cat /tmp/pr-digest/image-digest.txt) | |
| DEPLOY_DIGEST="${{ steps.digest.outputs.digest }}" | |
| echo "PR simulation digest: $PR_DIGEST" | |
| echo "Deploy image digest: $DEPLOY_DIGEST" | |
| if [ "$PR_DIGEST" = "$DEPLOY_DIGEST" ]; then | |
| echo "✓ Digest match — bit-for-bit parity confirmed between PR and deploy." | |
| else | |
| echo "⚠ Digest mismatch — builds diverged between PR and deploy." | |
| echo " Expected on squash-merges where the commit SHA changes." | |
| echo " Ensure no source changes occurred between PR approval and deploy trigger." | |
| fi | |
| # Phase 2: Trivy scan — image pinned by immutable digest, NOT trivy-action. | |
| # aquasec/trivy:0.49.1 → sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc | |
| # Identical severity gates to pr.yml (HIGH,CRITICAL / exit-code 1). | |
| # Two-phase: DB downloaded first (needs network), then scan runs --network none. | |
| - name: Get date for Trivy DB cache key | |
| id: trivy-date | |
| run: echo "date=$(date +'%Y-%m-%d')" >> "$GITHUB_OUTPUT" | |
| - name: Cache Trivy DB (daily refresh) | |
| uses: actions/cache@v4 | |
| with: | |
| path: /tmp/trivy-cache | |
| key: trivy-db-${{ runner.os }}-${{ steps.trivy-date.outputs.date }} | |
| restore-keys: | | |
| trivy-db-${{ runner.os }}- | |
| - name: Pull Trivy vulnerability database | |
| run: | | |
| docker run --rm \ | |
| -v /tmp/trivy-cache:/root/.cache \ | |
| aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc \ | |
| image --download-db-only | |
| - name: Scan image with Trivy (HIGH/CRITICAL, ignore-unfixed) | |
| env: | |
| IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }} | |
| run: | | |
| SCAN_PASSED=false | |
| for i in 1 2 3; do | |
| if docker run --rm \ | |
| --network none \ | |
| -v /var/run/docker.sock:/var/run/docker.sock \ | |
| -v /tmp/trivy-cache:/root/.cache \ | |
| -v "$(pwd)/.trivyignore:/tmp/.trivyignore:ro" \ | |
| aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc image \ | |
| --skip-db-update \ | |
| --ignore-unfixed \ | |
| --severity HIGH,CRITICAL \ | |
| --exit-code 1 \ | |
| --ignorefile /tmp/.trivyignore \ | |
| "$IMAGE_NAME"; then | |
| SCAN_PASSED=true | |
| break | |
| fi | |
| echo "Trivy attempt $i failed..." | |
| [ "$i" -lt 3 ] && sleep 5 | |
| done | |
| if [ "$SCAN_PASSED" != "true" ]; then | |
| echo "::error::Trivy scan failed after 3 attempts — HIGH/CRITICAL vulnerabilities found or scan error." | |
| exit 1 | |
| fi | |
| echo "✓ Trivy scan passed (HIGH/CRITICAL, ignore-unfixed)" | |
| - name: Scan for unfixed CRITICAL vulnerabilities (informational) | |
| continue-on-error: true | |
| env: | |
| IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }} | |
| run: | | |
| UNFIXED_COUNT=$(docker run --rm \ | |
| --network none \ | |
| -v /var/run/docker.sock:/var/run/docker.sock \ | |
| -v /tmp/trivy-cache:/root/.cache \ | |
| aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc image \ | |
| --skip-db-update \ | |
| --severity CRITICAL \ | |
| --format json \ | |
| "$IMAGE_NAME" | jq '[.Results[]?.Misconfigurations[]? // .Results[]?.Vulnerabilities[]? | select(.FixedVersion == null or .FixedVersion == "")] | length') | |
| if [ "$UNFIXED_COUNT" -gt 0 ]; then | |
| echo "⚠ WARNING: $UNFIXED_COUNT unfixed CRITICAL vulnerabilities found" | |
| echo " (No patches available upstream — waiting for vendor fix)" | |
| docker run --rm \ | |
| --network none \ | |
| -v /var/run/docker.sock:/var/run/docker.sock \ | |
| -v /tmp/trivy-cache:/root/.cache \ | |
| aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc image \ | |
| --skip-db-update \ | |
| --severity CRITICAL \ | |
| "$IMAGE_NAME" >> /tmp/unfixed-critical.log || true | |
| else | |
| echo "✓ No unfixed CRITICAL vulnerabilities" | |
| fi | |
| # Phase 3: Scan passed — push the exact scanned image (same layer digests). | |
| # Uses docker tag + push rather than rebuilding to guarantee what was scanned | |
| # is exactly what lands in the registry. | |
| - name: Verify image digest unchanged before push | |
| env: | |
| IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }} | |
| IMAGE_DIGEST: ${{ steps.digest.outputs.digest }} | |
| run: | | |
| # docker inspect .Id returns the config digest (sha256:...) which is | |
| # stable across tag operations — same value captured in the digest step. | |
| CURRENT=$(docker inspect "$IMAGE_NAME" --format='{{.Id}}') | |
| echo "Expected digest : $IMAGE_DIGEST" | |
| echo "Current digest : $CURRENT" | |
| if [ "$CURRENT" != "$IMAGE_DIGEST" ]; then | |
| echo "ERROR: image digest changed between scan and push — aborting." | |
| exit 1 | |
| fi | |
| echo "✓ Digest verified — pushing exactly what was scanned." | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Push verified image to registry | |
| run: | | |
| OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') | |
| docker tag \ | |
| fieldtrack-backend:${{ steps.meta.outputs.sha_short }} \ | |
| ghcr.io/${OWNER}/fieldtrack-backend:${{ steps.meta.outputs.sha_short }} | |
| docker push ghcr.io/${OWNER}/fieldtrack-backend:${{ steps.meta.outputs.sha_short }} | |
| echo "✓ Pushed ghcr.io/${OWNER}/fieldtrack-backend:${{ steps.meta.outputs.sha_short }}" | |
| # Use the same pinned Trivy image to generate the SBOM — no additional | |
| # tool dependency, no unpinned action, same supply-chain guarantees. | |
| - name: Generate SBOM (CycloneDX) | |
| env: | |
| IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }} | |
| run: | | |
| docker run --rm \ | |
| -v /var/run/docker.sock:/var/run/docker.sock \ | |
| aquasec/trivy@sha256:91494b87ddc64f62860d52997532643956c24eeee0d0dda317d563c28c8581bc image \ | |
| --format cyclonedx \ | |
| --output /dev/stdout \ | |
| "$IMAGE_NAME" > sbom.json | |
| - name: Upload SBOM artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sbom-${{ steps.meta.outputs.sha_short }} | |
| path: sbom.json | |
| retention-days: 90 | |
| - name: Save build provenance | |
| env: | |
| IMAGE_NAME: fieldtrack-backend:${{ steps.meta.outputs.sha_short }} | |
| IMAGE_DIGEST: ${{ steps.digest.outputs.digest }} | |
| run: | | |
| echo "commit=${{ github.sha }}" > provenance.txt | |
| echo "ref=${{ github.ref }}" >> provenance.txt | |
| echo "image=${IMAGE_NAME}" >> provenance.txt | |
| echo "digest=${IMAGE_DIGEST}" >> provenance.txt | |
| echo "workflow=${{ github.workflow }}" >> provenance.txt | |
| echo "run_id=${{ github.run_id }}" >> provenance.txt | |
| - name: Upload provenance artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: provenance-${{ steps.meta.outputs.sha_short }} | |
| path: provenance.txt | |
| retention-days: 90 | |
| - name: Build & scan summary | |
| if: always() | |
| env: | |
| IMAGE_DIGEST: ${{ steps.digest.outputs.digest }} | |
| run: | | |
| SBOM_COUNT=$(python3 -c "import json; d=json.load(open('sbom.json')); print(len(d.get('components', [])))" 2>/dev/null || echo 'n/a') | |
| { | |
| echo "## Build · Scan · Push" | |
| echo "| Field | Value |" | |
| echo "|---|---|" | |
| echo "| Commit SHA | \`${{ github.sha }}\` |" | |
| echo "| Image tag | \`fieldtrack-backend:${{ steps.meta.outputs.sha_short }}\` |" | |
| echo "| Image digest | \`${IMAGE_DIGEST}\` |" | |
| echo "| SBOM components | ${SBOM_COUNT} |" | |
| echo "| Trivy gate | HIGH,CRITICAL / exit-code 1 / ignore-unfixed |" | |
| echo "| Registry | ghcr.io/${{ github.repository_owner }}/fieldtrack-backend |" | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| # --------------------------------------------------------------------------- | |
| # JOB: deploy | |
| # | |
| # Blue-Green deployment to VPS via SSH. | |
| # The deploy-bluegreen.sh script manages slot switching and container health. | |
| # --------------------------------------------------------------------------- | |
| deploy: | |
| name: Deploy (Blue-Green SSH) | |
| runs-on: ubuntu-latest | |
| needs: [build-scan-push] | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Validate required deployment secrets | |
| env: | |
| API_BASE_URL: ${{ secrets.API_BASE_URL }} | |
| CORS_ORIGIN: ${{ secrets.CORS_ORIGIN }} | |
| run: | | |
| if [ -z "${API_BASE_URL:-}" ]; then | |
| echo "::error::API_BASE_URL secret is not set. Deployment aborted." | |
| exit 1 | |
| fi | |
| echo "✓ API_BASE_URL is set" | |
| if [ -z "${CORS_ORIGIN:-}" ]; then | |
| echo "::error::CORS_ORIGIN secret is not set. Deployment aborted." | |
| exit 1 | |
| fi | |
| echo "✓ CORS_ORIGIN is set" | |
| - name: Validate environment contract before deploy | |
| uses: appleboy/ssh-action@v1.0.3 | |
| with: | |
| host: ${{ secrets.DO_HOST }} | |
| username: ${{ secrets.DO_USER }} | |
| key: ${{ secrets.DO_SSH_KEY }} | |
| script: | | |
| set -euo pipefail | |
| export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0" | |
| cd "$DEPLOY_ROOT" | |
| git fetch origin | |
| git reset --hard origin/master | |
| chmod +x apps/api/scripts/*.sh | |
| echo "=== Pre-deploy environment validation ===" | |
| ./apps/api/scripts/validate-env.sh --check-monitoring | |
| echo "✓ Environment contract validated" | |
| - name: Blue-Green deploy via SSH | |
| uses: appleboy/ssh-action@v1.0.3 | |
| with: | |
| host: ${{ secrets.DO_HOST }} | |
| username: ${{ secrets.DO_USER }} | |
| key: ${{ secrets.DO_SSH_KEY }} | |
| script: | | |
| set -euo pipefail | |
| T0=$(date +%s) | |
| export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0" | |
| cd "$DEPLOY_ROOT" | |
| chmod +x apps/api/scripts/*.sh | |
| # Environment already validated in previous step | |
| ./apps/api/scripts/deploy-bluegreen.sh "${{ needs.build-scan-push.outputs.sha_short }}" | |
| echo "✓ Deploy completed in $(($(date +%s) - T0))s" | |
| - name: Log deployment state (slot + SHA for debugging) | |
| uses: appleboy/ssh-action@v1.0.3 | |
| if: always() | |
| with: | |
| host: ${{ secrets.DO_HOST }} | |
| username: ${{ secrets.DO_USER }} | |
| key: ${{ secrets.DO_SSH_KEY }} | |
| script: | | |
| ACTIVE_SLOT=$(cat /var/run/fieldtrack/active-slot 2>/dev/null || echo "unknown") | |
| DEPLOY_STATUS="UNKNOWN" | |
| # Check if health endpoint is responding (good sign of successful deploy) | |
| if timeout 5 curl -sf http://127.0.0.1:3000/health >/dev/null 2>&1; then | |
| DEPLOY_STATUS="SUCCESS" | |
| fi | |
| echo "DEPLOY_STATE=$DEPLOY_STATUS| SLOT=$ACTIVE_SLOT | SHA=${{ github.sha }}" | |
| # --------------------------------------------------------------------------- | |
| # JOB: sync-infra | |
| # | |
| # Syncs Nginx config (with slot-aware port substitution). | |
| # Monitoring restarts are handled exclusively by deploy-bluegreen.sh. | |
| # --------------------------------------------------------------------------- | |
| sync-infra: | |
| name: Sync Infrastructure (nginx) | |
| runs-on: ubuntu-latest | |
| needs: [deploy] | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Sync infrastructure configs via SSH | |
| uses: appleboy/ssh-action@v1.0.3 | |
| with: | |
| host: ${{ secrets.DO_HOST }} | |
| username: ${{ secrets.DO_USER }} | |
| key: ${{ secrets.DO_SSH_KEY }} | |
| script: | | |
| set -euo pipefail | |
| T0=$(date +%s) | |
| export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0" | |
| INFRA_DIR="$DEPLOY_ROOT/infra" | |
| NGINX_LIVE="/etc/nginx/sites-enabled/fieldtrack.conf" | |
| ACTIVE_SLOT_FILE="/var/run/fieldtrack/active-slot" | |
| ACTIVE_SLOT=$(cat "$ACTIVE_SLOT_FILE" 2>/dev/null || echo "blue") | |
| if [ "$ACTIVE_SLOT" = "green" ]; then BACKEND_PORT=3002; else BACKEND_PORT=3001; fi | |
| # Load env from apps/api/.env — exports DEPLOY_ROOT, API_HOSTNAME, and all | |
| # app variables. DEPLOY_ROOT is already exported above; load-env.sh uses it. | |
| source "$DEPLOY_ROOT/apps/api/scripts/load-env.sh" | |
| echo "✓ API_HOSTNAME: $API_HOSTNAME" | |
| echo "=== Syncing Nginx (slot: $ACTIVE_SLOT, port: $BACKEND_PORT) ===" | |
| sudo cp "$NGINX_LIVE" /tmp/fieldtrack.conf.bak 2>/dev/null || true | |
| NGINX_TMP=$(mktemp /tmp/fieldtrack-nginx.XXXXXX.conf) | |
| sed \ | |
| -e "s|__BACKEND_PORT__|$BACKEND_PORT|g" \ | |
| -e "s|__API_HOSTNAME__|$API_HOSTNAME|g" \ | |
| "$INFRA_DIR/nginx/fieldtrack.conf" > "$NGINX_TMP" | |
| sudo cp "$NGINX_TMP" "$NGINX_LIVE" | |
| rm -f "$NGINX_TMP" | |
| if ! sudo nginx -t 2>&1; then | |
| echo "Nginx test failed — restoring backup..." | |
| sudo cp /tmp/fieldtrack.conf.bak "$NGINX_LIVE" | |
| exit 1 | |
| fi | |
| sudo systemctl reload nginx | |
| echo "✓ Nginx reloaded." | |
| echo "✓ Infra sync completed in $(($(date +%s) - T0))s" | |
| # --------------------------------------------------------------------------- | |
| # JOB: health-and-smoke | |
| # | |
| # Step 1: Poll /health and /ready until they return 200 (up to 60 s each). | |
| # Step 2: Run the full smoke test suite (login + core API flows). | |
| # Failure here triggers the rollback job automatically. | |
| # --------------------------------------------------------------------------- | |
| health-and-smoke: | |
| name: Health Checks & Smoke Tests | |
| runs-on: ubuntu-latest | |
| needs: [sync-infra] | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v5 | |
| - name: Wait for /health endpoint (via VPS) | |
| uses: appleboy/ssh-action@v1.0.3 | |
| with: | |
| host: ${{ secrets.DO_HOST }} | |
| username: ${{ secrets.DO_USER }} | |
| key: ${{ secrets.DO_SSH_KEY }} | |
| script: | | |
| set -euo pipefail | |
| export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0" | |
| cd "$DEPLOY_ROOT" | |
| source apps/api/scripts/load-env.sh | |
| echo "=== Checking /health via VPS (API_HOSTNAME=$API_HOSTNAME) ===" | |
| for i in $(seq 1 30); do | |
| echo "---- Attempt $i ----" | |
| STATUS=$(curl -sS \ | |
| --resolve "${API_HOSTNAME}:443:127.0.0.1" \ | |
| -o /tmp/resp.txt \ | |
| -w "%{http_code}" \ | |
| https://${API_HOSTNAME}/health \ | |
| --insecure || echo "000") | |
| BODY=$(cat /tmp/resp.txt 2>/dev/null || echo "") | |
| echo "HTTP: $STATUS" | |
| echo "BODY: $BODY" | |
| if [ "$STATUS" = "200" ] && echo "$BODY" | grep -q '"status":"ok"'; then | |
| echo "✓ /health OK (attempt $i)" | |
| exit 0 | |
| fi | |
| sleep 2 | |
| done | |
| echo "❌ /health failed" | |
| exit 1 | |
| - name: Wait for /health endpoint (final public check) | |
| uses: appleboy/ssh-action@v1.0.3 | |
| with: | |
| host: ${{ secrets.DO_HOST }} | |
| username: ${{ secrets.DO_USER }} | |
| key: ${{ secrets.DO_SSH_KEY }} | |
| script: | | |
| set -euo pipefail | |
| export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0" | |
| cd "$DEPLOY_ROOT" | |
| source apps/api/scripts/load-env.sh | |
| echo "=== Final health check via public endpoint (API_HOSTNAME=$API_HOSTNAME) ===" | |
| for i in $(seq 1 10); do | |
| echo "---- Attempt $i ----" | |
| STATUS=$(curl -sS \ | |
| --resolve "${API_HOSTNAME}:443:127.0.0.1" \ | |
| -o /tmp/resp.txt \ | |
| -w "%{http_code}" \ | |
| https://${API_HOSTNAME}/health \ | |
| --insecure || echo "000") | |
| BODY=$(cat /tmp/resp.txt 2>/dev/null || echo "") | |
| echo "HTTP: $STATUS" | |
| echo "BODY: $BODY" | |
| if [ "$STATUS" = "200" ] && echo "$BODY" | grep -q '"status":"ok"'; then | |
| echo "✓ /health OK (attempt $i)" | |
| exit 0 | |
| fi | |
| sleep 2 | |
| done | |
| echo "❌ /health failed" | |
| exit 1 | |
| - name: Run smoke tests | |
| env: | |
| API_BASE_URL: ${{ secrets.API_BASE_URL }} | |
| FT_EMP_EMAIL: ${{ secrets.FT_EMP_EMAIL }} | |
| FT_EMP_PASSWORD: ${{ secrets.FT_EMP_PASSWORD }} | |
| FT_ADMIN_EMAIL: ${{ secrets.FT_ADMIN_EMAIL }} | |
| FT_ADMIN_PASSWORD: ${{ secrets.FT_ADMIN_PASSWORD }} | |
| SUPABASE_URL: ${{ secrets.SUPABASE_URL }} | |
| SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY }} | |
| run: | | |
| chmod +x apps/api/scripts/smoke-test.sh | |
| ./apps/api/scripts/smoke-test.sh | |
| - name: Upload smoke test report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: smoke-test-report-${{ github.sha }} | |
| path: smoke-report.json | |
| retention-days: 30 | |
| - name: Deployment summary | |
| run: | | |
| echo "=====================================================" | |
| echo " Production Deployment: COMPLETE ✅" | |
| echo "=====================================================" | |
| echo " Commit: ${{ github.sha }}" | |
| echo " /health: OK" | |
| echo " /ready: OK" | |
| echo " Smoke: passed" | |
| echo "=====================================================" | |
| # --------------------------------------------------------------------------- | |
| # JOB: rollback | |
| # | |
| # Triggered automatically when deploy, sync-infra, OR health-and-smoke fails. | |
| # Restores the previously healthy Blue-Green slot via the rollback script. | |
| # 'if: always()' ensures this job can evaluate even if upstream jobs failed. | |
| # --------------------------------------------------------------------------- | |
| rollback: | |
| name: Rollback Deployment (auto) | |
| runs-on: ubuntu-latest | |
| needs: [deploy, sync-infra, health-and-smoke] | |
| timeout-minutes: 10 | |
| if: | | |
| always() && | |
| ( | |
| needs.deploy.result == 'failure' || | |
| needs.sync-infra.result == 'failure' || | |
| needs.health-and-smoke.result == 'failure' | |
| ) | |
| steps: | |
| - name: Log rollback trigger | |
| run: | | |
| echo "ROLLBACK_TRIGGERED=TRUE | FAILED_JOBS:" | |
| [ "${{ needs.deploy.result }}" = "failure" ] && echo " - deploy" | |
| [ "${{ needs.sync-infra.result }}" = "failure" ] && echo " - sync-infra" | |
| [ "${{ needs.health-and-smoke.result }}" = "failure" ] && echo " - health-and-smoke" | |
| echo "SHA=${{ github.sha }}" | |
| - name: Rollback on VPS | |
| uses: appleboy/ssh-action@v1.0.3 | |
| with: | |
| host: ${{ secrets.DO_HOST }} | |
| username: ${{ secrets.DO_USER }} | |
| key: ${{ secrets.DO_SSH_KEY }} | |
| script: | | |
| set -euo pipefail | |
| export DEPLOY_ROOT="/home/ashish/FieldTrack-2.0" | |
| cd "$DEPLOY_ROOT" | |
| chmod +x apps/api/scripts/*.sh | |
| ./apps/api/scripts/rollback.sh --auto | |
| # Log final state | |
| ACTIVE_SLOT=$(cat /var/run/fieldtrack/active-slot 2>/dev/null || echo "unknown") | |
| echo "ROLLBACK_COMPLETE | ACTIVE_SLOT=$ACTIVE_SLOT | SHA=${{ github.sha }}" |