Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 33 additions & 20 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -624,23 +624,30 @@ jobs:

# Determine active slot (blue/green)
ACTIVE_SLOT=$(cat /var/run/api/active-slot 2>/dev/null || echo "blue")
if [ "$ACTIVE_SLOT" = "green" ]; then BACKEND_PORT=3002; else BACKEND_PORT=3001; fi

echo "=== API Health Gate (slot: $ACTIVE_SLOT, port: $BACKEND_PORT) ==="

# Poll /ready endpoint (internal readiness probe)
ACTIVE_CONTAINER="api-$ACTIVE_SLOT"

echo "=== API Health Gate (slot: $ACTIVE_SLOT, container: $ACTIVE_CONTAINER) ==="

# Guard: container must exist before we try to reach it
docker inspect "$ACTIVE_CONTAINER" >/dev/null 2>&1 || {
echo "❌ Container $ACTIVE_CONTAINER not found"
exit 1
}

# Poll /ready via Docker service DNS (no host port binding needed)
for i in $(seq 1 15); do
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:$BACKEND_PORT/ready" 2>/dev/null || echo "000")
STATUS=$(docker exec "$ACTIVE_CONTAINER" \
curl -s -o /dev/null -w "%{http_code}" "http://localhost:3000/ready" 2>/dev/null || echo "000")
if [ "$STATUS" = "200" ]; then
echo "βœ“ API ready on port $BACKEND_PORT (attempt $i)"
echo "βœ“ API ready (container $ACTIVE_CONTAINER, attempt $i)"
exit 0
fi
echo " Attempt $i: HTTP $STATUS β€” waiting..."
sleep 2
done

echo "❌ API /ready did not return 200 after 30s β€” monitoring sync would fail anyway"
docker logs "api-$ACTIVE_SLOT" --tail 30 2>/dev/null || true
docker logs "$ACTIVE_CONTAINER" --tail 30 2>/dev/null || true
exit 1

# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -673,34 +680,39 @@ jobs:
[ -d "$DEPLOY_ROOT" ] || { echo "❌ DEPLOY_ROOT not found: $DEPLOY_ROOT"; exit 1; }
cd "$DEPLOY_ROOT"
INFRA_DIR="$DEPLOY_ROOT/infra"
NGINX_LIVE="/etc/nginx/sites-enabled/api.conf"
NGINX_LIVE="$DEPLOY_ROOT/infra/nginx/live/api.conf"
NGINX_BACKUP_DIR="$DEPLOY_ROOT/infra/nginx/backup"
ACTIVE_SLOT_FILE="/var/run/api/active-slot"

ACTIVE_SLOT=$(cat "$ACTIVE_SLOT_FILE" 2>/dev/null || echo "blue")
if [ "$ACTIVE_SLOT" = "green" ]; then BACKEND_PORT=3002; else BACKEND_PORT=3001; fi
ACTIVE_CONTAINER="api-$ACTIVE_SLOT"

# Load env from .env β€” exports DEPLOY_ROOT, API_HOSTNAME, and all
# app variables. DEPLOY_ROOT is already exported above; load-env.sh uses it.
source "$DEPLOY_ROOT/scripts/load-env.sh"

echo "βœ“ API_HOSTNAME: $API_HOSTNAME"

echo "=== Syncing Nginx (slot: $ACTIVE_SLOT, port: $BACKEND_PORT) ==="
sudo cp "$NGINX_LIVE" /tmp/api.conf.bak 2>/dev/null || true
# Ensure live/backup dirs exist
mkdir -p "$(dirname "$NGINX_LIVE")" "$NGINX_BACKUP_DIR"

echo "=== Syncing Nginx (slot: $ACTIVE_SLOT, container: $ACTIVE_CONTAINER) ==="
cp "$NGINX_LIVE" "$NGINX_BACKUP_DIR/api.conf.bak.$(date +%s)" 2>/dev/null || true
NGINX_TMP=$(mktemp /tmp/fieldtrack-nginx.XXXXXX.conf)
sed \
-e "s|__BACKEND_PORT__|$BACKEND_PORT|g" \
-e "s|__ACTIVE_CONTAINER__|$ACTIVE_CONTAINER|g" \
-e "s|__API_HOSTNAME__|$API_HOSTNAME|g" \
"$INFRA_DIR/nginx/api.conf" > "$NGINX_TMP"
sudo cp "$NGINX_TMP" "$NGINX_LIVE"
cp "$NGINX_TMP" "$NGINX_LIVE"
rm -f "$NGINX_TMP"

if ! sudo nginx -t 2>&1; then
if ! docker exec nginx nginx -t 2>&1; then
echo "Nginx test failed β€” restoring backup..."
sudo cp /tmp/api.conf.bak "$NGINX_LIVE"
LATEST_BAK=$(ls -1t "$NGINX_BACKUP_DIR"/api.conf.bak.* 2>/dev/null | head -1 || true)
[ -n "$LATEST_BAK" ] && cp "$LATEST_BAK" "$NGINX_LIVE"
exit 1
fi
sudo systemctl reload nginx
docker exec nginx nginx -s reload
echo "βœ“ Nginx reloaded."

# ROUTING VALIDATION β€” Test actual traffic through Nginx
Expand All @@ -718,8 +730,9 @@ jobs:
echo "βœ“ Nginx routing verified (HTTP $ROUTE_STATUS)"
else
echo "❌ Nginx routing broken (HTTP $ROUTE_STATUS expected 200) β€” restoring backup..."
sudo cp /tmp/api.conf.bak "$NGINX_LIVE"
sudo nginx -t 2>&1 && sudo systemctl reload nginx || true
LATEST_BAK=$(ls -1t "$NGINX_BACKUP_DIR"/api.conf.bak.* 2>/dev/null | head -1 || true)
[ -n "$LATEST_BAK" ] && cp "$LATEST_BAK" "$NGINX_LIVE"
docker exec nginx nginx -t 2>&1 && docker exec nginx nginx -s reload || true
exit 1
fi

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ jobs:
if: needs.detect-changes.outputs.infra == 'true'
run: |
sed \
-e 's/__BACKEND_PORT__/3001/g' \
-e 's/__ACTIVE_CONTAINER__/api-blue/g' \
-e 's/__API_HOSTNAME__/api.test.local/g' \
infra/nginx/api.conf > /tmp/nginx.conf

Expand Down
47 changes: 45 additions & 2 deletions infra/docker-compose.monitoring.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ services:
image: prom/prometheus:v2.52.0
container_name: prometheus
restart: unless-stopped
ports:
- "127.0.0.1:9090:9090"
expose:
- "9090"

environment:
- METRICS_SCRAPE_TOKEN=${METRICS_SCRAPE_TOKEN}
Expand Down Expand Up @@ -203,6 +203,49 @@ services:
max-size: "10m"
max-file: "3"

nginx:
image: nginx:1.25-alpine
container_name: nginx
restart: unless-stopped
ports:
- "80:80"
- "443:443"

volumes:
# Rendered nginx config β€” written by deploy script on each deploy
- ./nginx/live:/etc/nginx/conf.d:ro
# SSL certificates (managed by certbot on the host)
- /etc/ssl/api:/etc/ssl/api:ro
# ACME challenge webroot for certbot renewal
- /var/www/certbot:/var/www/certbot:ro
# Nginx access logs shared with promtail
- /var/log/nginx:/var/log/nginx

networks:
- api_network

depends_on:
grafana:
condition: service_healthy

deploy:
resources:
limits:
memory: 64m

logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"

healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:80/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s

networks:
api_network:
external: true
Expand Down
33 changes: 23 additions & 10 deletions infra/nginx/api.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ map $http_upgrade $connection_upgrade {
'' close;
}

upstream api_backend {
server 127.0.0.1:__BACKEND_PORT__ max_fails=3 fail_timeout=30s;
keepalive 32;
}
# NOTE: No upstream block for api_backend.
# upstream blocks resolve server hostnames at config-load time, which fails
# for Docker service names (api-blue / api-green) that may not exist yet.
# Instead, use a variable + proxy_pass to defer resolution to request time via
# the resolver 127.0.0.11 directive defined in the server block below.

limit_req_zone $binary_remote_addr zone=api_rate:10m rate=60r/s;
limit_req_zone $binary_remote_addr zone=api_health:10m rate=5r/s;
Expand Down Expand Up @@ -73,12 +74,20 @@ geo $realip_remote_addr $is_trusted_source {
198.41.128.0/17 1;
}

# HTTP β†’ HTTPS
# HTTP β†’ HTTPS (with ACME challenge passthrough for certbot renewal)
server {
listen 80;
listen [::]:80;
server_name __API_HOSTNAME__;
return 301 https://$host$request_uri;

# Let certbot serve ACME challenges for certificate renewal
location /.well-known/acme-challenge/ {
root /var/www/certbot;
}

location / {
return 301 https://$host$request_uri;
}
}

# HTTPS SERVER
Expand Down Expand Up @@ -108,6 +117,10 @@ server {
resolver 127.0.0.11 valid=10s;
resolver_timeout 5s;

# Variable-based backend URL β€” resolved at request time via Docker DNS (127.0.0.11).
# __ACTIVE_CONTAINER__ is substituted with api-blue or api-green by deploy script.
set $api_backend "http://__ACTIVE_CONTAINER__:3000";

# safer host validation (still simple)
if ($host !~* ^(__API_HOSTNAME__|localhost|127\.0\.0\.1)$) {
return 444;
Expand Down Expand Up @@ -163,7 +176,7 @@ server {
# regressions β€” nginx won't silently change the upstream path.
location = /health {
limit_req zone=api_health burst=10 nodelay;
proxy_pass http://api_backend/health;
proxy_pass $api_backend$request_uri;
proxy_buffering off;
proxy_set_header Host __API_HOSTNAME__;
proxy_set_header X-Forwarded-Host $host;
Expand All @@ -181,7 +194,7 @@ server {
allow ::1;
deny all;
limit_req zone=api_health burst=10 nodelay;
proxy_pass http://api_backend/ready;
proxy_pass $api_backend$request_uri;
proxy_buffering off;
proxy_set_header Host __API_HOSTNAME__;
proxy_set_header X-Forwarded-Host $host;
Expand All @@ -195,7 +208,7 @@ server {
# SSE β€” open to all origins; application enforces JWT auth
location = /admin/events {
limit_req zone=api_rate burst=10 nodelay;
proxy_pass http://api_backend;
proxy_pass $api_backend$request_uri;
proxy_http_version 1.1;
proxy_set_header Connection '';
proxy_set_header Host __API_HOSTNAME__;
Expand All @@ -210,7 +223,7 @@ server {
# MAIN API β€” open to all origins; application enforces JWT + RBAC
location / {
limit_req zone=api_rate burst=50 nodelay;
proxy_pass http://api_backend;
proxy_pass $api_backend$request_uri;
proxy_http_version 1.1;
proxy_set_header Host __API_HOSTNAME__;
proxy_set_header X-Forwarded-Host $host;
Expand Down
Loading
Loading