Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions .github/workflows/codeql-deep.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: "CodeQL β€” Deep Scan (post-merge)"
# Runs after every merge to master AND on a weekly schedule.
# Uses the full security-and-quality query suite β€” significantly more thorough
# than the PR lightweight scan.
#
# DOES NOT block the Deploy pipeline. Both workflows trigger independently on
# a master push; deploy.yml never depends on this workflow. Results are
# uploaded to the GitHub Security tab for async review.
#
# If critical issues are found, the security team should open a tracking issue
# and gate the next deployment manually. This workflow itself never fails the
# deploy unless an operator explicitly adds it as a required check.

on:
push:
branches: ["master"]
schedule:
# Every Monday at 03:15 UTC β€” offset from midnight to avoid GHA congestion.
- cron: "15 3 * * 1"

# Do not cancel in-progress deep scans β€” let them complete for full coverage.
concurrency:
group: codeql-deep-${{ github.ref }}
cancel-in-progress: false

permissions:
actions: read
contents: read
security-events: write

jobs:
analyze-deep:
name: Deep Analyze (CodeQL)
runs-on: ubuntu-latest
timeout-minutes: 40

strategy:
fail-fast: false
matrix:
language: ["javascript"]

steps:
- name: Checkout repository
uses: actions/checkout@v5

- name: Setup Node.js (match production)
uses: actions/setup-node@v5
with:
node-version: 24
cache: npm
cache-dependency-path: package-lock.json

- name: Install dependencies
run: npm ci

- name: Build API
run: npm run build || true

- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
languages: ${{ matrix.language }}
# Full suite: security + quality + style rules.
# Catches OWASP Top-10 plus code-quality issues that may hide security risks.
queries: security-and-quality

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
with:
category: "codeql-deep"
# Upload unconditionally β€” results land in the Security tab regardless
# of whether any alerts are found.
upload: always

- name: Write deep-scan summary
if: always()
run: |
{
echo "## CodeQL Deep Scan"
echo "| Field | Value |"
echo "|---|---|"
echo "| Commit | \`${{ github.sha }}\` |"
echo "| Ref | \`${{ github.ref }}\` |"
echo "| Query suite | \`security-and-quality\` |"
echo "| Results | [Security tab](/${{ github.repository }}/security/code-scanning) |"
} >> "$GITHUB_STEP_SUMMARY"
30 changes: 16 additions & 14 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
name: "CodeQL Security Scan"
name: "CodeQL β€” PR Scan (lightweight)"
# Runs on every PR to master. Fast feedback: security-extended queries only.
# The deep security-and-quality scan runs separately in codeql-deep.yml after
# a merge lands on master and does NOT block this pipeline.

on:
push:
branches: ["master"]
pull_request:
branches: ["master"]
schedule:
- cron: "0 3 * * 1"

# Cancel in-flight scans for the same PR when new commits are pushed.
concurrency:
group: codeql-${{ github.ref }}
group: codeql-pr-${{ github.event.pull_request.number }}
cancel-in-progress: true

permissions:
Expand All @@ -21,7 +21,7 @@ jobs:
analyze:
name: Analyze (CodeQL)
runs-on: ubuntu-latest
timeout-minutes: 25
timeout-minutes: 15

strategy:
fail-fast: false
Expand All @@ -32,29 +32,31 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v5

# βœ… Match production runtime
- name: Setup Node.js (match production)
uses: actions/setup-node@v5
with:
node-version: 24
cache: npm
cache-dependency-path: package-lock.json

# βœ… Install ALL dependencies
- name: Install dependencies
run: npm ci

# βœ… Build API (critical for CodeQL flow analysis)
# Build so CodeQL can trace data flows through compiled output.
- name: Build API
run: npm run build || true

# βœ… Initialize CodeQL AFTER dependencies
# Initialize AFTER install + build so the database includes all sources.
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
languages: ${{ matrix.language }}
queries: security-and-quality
# security-extended: broader than the default security set but
# significantly faster than security-and-quality (no style/quality rules).
# Catches OWASP Top-10 class issues without slowing PR feedback.
queries: security-extended

# βœ… Analyze
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
uses: github/codeql-action/analyze@v4
with:
category: "codeql-pr"
157 changes: 152 additions & 5 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#
# Parallel stages:
# validate ─┐
# test-api β”œβ”€β–Ί build-scan-push ─► deploy ─► sync-infra ─► health-and-smoke
# β”˜ β”‚
# test-api β”œβ”€β–Ί build-scan-push ─► deploy ─► api-health-gate ─► sync-infra ─► sync-monitoring ─► health-and-smoke
# β”˜ β”‚
# rollback β—„β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ (on failure)

name: Deploy to Production
Expand Down Expand Up @@ -517,6 +517,21 @@ jobs:
fi
echo "βœ“ CORS_ORIGIN is set"

- name: Log deployment metadata and trigger info
run: |
{
echo "## Deployment Initiated"
echo "| Field | Value |"
echo "|---|---|"
echo "| Commit SHA | \`${{ github.sha }}\` |"
echo "| Trigger event | ${{ github.event_name }} |"
echo "| Triggered by | ${{ github.actor }} |"
echo "| Branch | ${{ github.ref_name }} |"
echo "| Workflow run | [${{ github.run_id }}](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) |"
echo "| Commit message | \`${{ github.event.head_commit.message }}\` |"
} >> "$GITHUB_STEP_SUMMARY"
echo "πŸ“‹ Deployment initiated β€” SHA=${{ github.sha }} EVENT=${{ github.event_name }} ACTOR=${{ github.actor }} RUN=${{ github.run_id }}"

- name: Validate environment contract before deploy
uses: appleboy/ssh-action@v1.0.3
with:
Expand Down Expand Up @@ -581,6 +596,53 @@ jobs:

echo "DEPLOY_STATE=$DEPLOY_STATUS| SLOT=$ACTIVE_SLOT | SHA=${{ github.sha }}"

# ---------------------------------------------------------------------------
# JOB: api-health-gate (Step E+)
#
# Early API health validation β€” runs AFTER deploy but BEFORE infra sync.
# Ensures the API container is truly healthy before we sync monitoring/nginx.
# If the API is not healthy at this point, STOP before touching infra.
# ---------------------------------------------------------------------------
api-health-gate:
name: API Health Gate
runs-on: ubuntu-latest
needs: [deploy]
timeout-minutes: 5
steps:
- name: Verify API container is healthy before infra sync
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
set -euo pipefail
export DEPLOY_ROOT="${DEPLOY_ROOT:-$HOME/api}"
[ -d "$DEPLOY_ROOT" ] || { echo "❌ DEPLOY_ROOT not found: $DEPLOY_ROOT"; exit 1; }
cd "$DEPLOY_ROOT"
source scripts/load-env.sh

# Determine active slot (blue/green)
ACTIVE_SLOT=$(cat /var/run/api/active-slot 2>/dev/null || echo "blue")
if [ "$ACTIVE_SLOT" = "green" ]; then BACKEND_PORT=3002; else BACKEND_PORT=3001; fi

echo "=== API Health Gate (slot: $ACTIVE_SLOT, port: $BACKEND_PORT) ==="

# Poll /ready endpoint (internal readiness probe)
for i in $(seq 1 15); do
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:$BACKEND_PORT/ready" 2>/dev/null || echo "000")
if [ "$STATUS" = "200" ]; then
echo "βœ“ API ready on port $BACKEND_PORT (attempt $i)"
exit 0
fi
echo " Attempt $i: HTTP $STATUS β€” waiting..."
sleep 2
done

echo "❌ API /ready did not return 200 after 30s β€” monitoring sync would fail anyway"
docker logs "api-$ACTIVE_SLOT" --tail 30 2>/dev/null || true
exit 1

# ---------------------------------------------------------------------------
# JOB: sync-infra
#
Expand All @@ -590,7 +652,7 @@ jobs:
sync-infra:
name: Sync Infrastructure (nginx)
runs-on: ubuntu-latest
needs: [deploy]
needs: [api-health-gate]
timeout-minutes: 10
steps:
- name: Sync infrastructure configs via SSH
Expand Down Expand Up @@ -641,8 +703,89 @@ jobs:
sudo systemctl reload nginx
echo "βœ“ Nginx reloaded."

# ROUTING VALIDATION β€” Test actual traffic through Nginx
# Config syntax is valid (nginx -t) but routing may still be broken.
# Test by hitting the /health endpoint via localhost + Host header.
echo "=== Testing Nginx routing (localhost + Host header) ==="
sleep 2 # Give Nginx a moment to fully apply reload

ROUTE_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
--resolve "$API_HOSTNAME:443:127.0.0.1" \
-H "Host: $API_HOSTNAME" \
"https://127.0.0.1/health" --insecure 2>/dev/null || echo "000")

if [ "$ROUTE_STATUS" = "200" ]; then
echo "βœ“ Nginx routing verified (HTTP $ROUTE_STATUS)"
else
echo "❌ Nginx routing broken (HTTP $ROUTE_STATUS expected 200) β€” restoring backup..."
sudo cp /tmp/api.conf.bak "$NGINX_LIVE"
sudo nginx -t 2>&1 && sudo systemctl reload nginx || true
exit 1
fi

echo "βœ“ Infra sync completed in $(($(date +%s) - T0))s"

# ---------------------------------------------------------------------------
# JOB: sync-monitoring (Step F)
#
# Idempotent monitoring stack sync β€” runs after every deploy.
# Delegates to scripts/monitoring-sync.sh which:
# - Self-heals missing .env.monitoring from example
# - Creates api_network if absent
# - Renders alertmanager.rendered.yml
# - Runs docker compose up -d
# - Validates prometheus / alertmanager / grafana health
# Monitoring is REQUIRED β€” deploy fails if any required container is unhealthy.
# ---------------------------------------------------------------------------
sync-monitoring:
name: Sync Monitoring Stack
runs-on: ubuntu-latest
needs: [sync-infra]
timeout-minutes: 15
steps:
- name: Sync and validate monitoring stack via SSH
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_HOST }}
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
set -euo pipefail
export DEPLOY_ROOT="${DEPLOY_ROOT:-$HOME/api}"
[ -d "$DEPLOY_ROOT" ] || { echo "❌ DEPLOY_ROOT not found: $DEPLOY_ROOT"; exit 1; }
cd "$DEPLOY_ROOT"
chmod +x scripts/monitoring-sync.sh
./scripts/monitoring-sync.sh

- name: Monitoring sync summary
if: always()
run: |
{
echo "## Monitoring Sync"
echo "| Container | Required |"
echo "|---|---|"
echo "| prometheus | βœ… |"
echo "| alertmanager | βœ… |"
echo "| grafana | βœ… |"
} >> "$GITHUB_STEP_SUMMARY"

- name: Deployment artifact traceability
if: always()
run: |
{
echo "## Deployment Artifacts"
echo "| Field | Value |"
echo "|---|---|"
echo "| Deployment SHA | \`${{ github.sha }}\` |"
echo "| Image Tag | \`fieldtrack-api:${{ needs.get-metadata.outputs.sha_short || github.sha }}\` |"
echo "| Workflow Run | [\#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) |"
echo "| Triggered By | \`${{ github.event_name }}\` |"
echo "| Commit Message | \`${{ github.event.head_commit.message }}\` |"
} >> "$GITHUB_STEP_SUMMARY"

# Also output to logs for audit trail
echo "DEPLOYMENT_COMPLETE: SHA=${{ github.sha }} IMAGE=ghcr.io/${{ github.repository_owner }}/api:${{ github.sha }} RUN=${{ github.run_id }}"

# ---------------------------------------------------------------------------
# JOB: health-and-smoke
#
Expand All @@ -653,7 +796,7 @@ jobs:
health-and-smoke:
name: Health Checks & Smoke Tests
runs-on: ubuntu-latest
needs: [sync-infra]
needs: [sync-infra, sync-monitoring]
timeout-minutes: 15
steps:
- name: Checkout
Expand Down Expand Up @@ -767,21 +910,25 @@ jobs:
rollback:
name: Rollback Deployment (auto)
runs-on: ubuntu-latest
needs: [deploy, sync-infra, health-and-smoke]
needs: [deploy, api-health-gate, sync-infra, sync-monitoring, health-and-smoke]
timeout-minutes: 10
if: |
always() &&
(
needs.deploy.result == 'failure' ||
needs.api-health-gate.result == 'failure' ||
needs.sync-infra.result == 'failure' ||
needs.sync-monitoring.result == 'failure' ||
needs.health-and-smoke.result == 'failure'
)
steps:
- name: Log rollback trigger
run: |
echo "ROLLBACK_TRIGGERED=TRUE | FAILED_JOBS:"
[ "${{ needs.deploy.result }}" = "failure" ] && echo " - deploy"
[ "${{ needs.api-health-gate.result }}" = "failure" ] && echo " - api-health-gate"
[ "${{ needs.sync-infra.result }}" = "failure" ] && echo " - sync-infra"
[ "${{ needs.sync-monitoring.result }}" = "failure" ] && echo " - sync-monitoring"
[ "${{ needs.health-and-smoke.result }}" = "failure" ] && echo " - health-and-smoke"
echo "SHA=${{ github.sha }}"

Expand Down
Loading
Loading