Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions .github/workflows/ci-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,15 @@ jobs:
echo "🧪 Running unit tests..."
# Run unit tests for package validation
TEST_RESULTS_DIR="test-results/unit" tests/unit/package-validation.sh
- name: Run Metrics Unit Tests
if: matrix.test-suite == 'unit'
run: |
echo "🧪 Running metrics unit tests..."
if [[ -f tests/unit/test-metrics-phase1.sh ]]; then
bash tests/unit/test-metrics-phase1.sh
else
echo "⚠️ test-metrics-phase1.sh not found, skipping"
fi
- name: Run Integration Tests
if: matrix.test-suite == 'integration'
run: |
Expand Down Expand Up @@ -919,6 +928,41 @@ jobs:
echo "✅ Integration tests passed"
echo "PASSED" > test-results/integration/status.txt
fi
- name: Run Prometheus Metrics Tests
if: matrix.test-suite == 'integration'
run: |
echo "🧪 Running Prometheus metrics integration tests..."
mkdir -p test-results/integration/metrics
metrics_errors=0
# Phase 6 static analysis tests (no containers needed)
METRICS_TESTS=(
"tests/integration/test-metrics-endpoint.sh"
"tests/integration/test-metrics-performance.sh"
"tests/integration/test-metrics-persistence.sh"
"tests/integration/test-metrics-scaling.sh"
"tests/integration/test-metrics-security.sh"
"tests/integration/test-docs-validation.sh"
)
for test_script in "${METRICS_TESTS[@]}"; do
test_name="$(basename "$test_script" .sh)"
echo "Running $test_name..."
if bash "$test_script" > "test-results/integration/metrics/$test_name.log" 2>&1; then
echo "✅ $test_name passed"
else
echo "❌ $test_name failed"
echo "--- Output ---"
tail -20 "test-results/integration/metrics/$test_name.log"
echo "--- End ---"
metrics_errors=$((metrics_errors + 1))
fi
done
echo "Prometheus metrics tests completed. Errors: $metrics_errors"
if [[ $metrics_errors -gt 0 ]]; then
echo "❌ Prometheus metrics tests failed ($metrics_errors errors)"
exit 1
else
echo "✅ All Prometheus metrics tests passed"
fi
- name: Run Docker Package Validation
if: matrix.test-suite == 'docker-validation'
run: |
Expand Down
163 changes: 151 additions & 12 deletions .github/workflows/security-advisories.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@ on:
- "MEDIUM"
- "LOW"
scan_targets:
description: "Scan targets (comma-separated: filesystem,container,chrome)"
description: "Scan targets (comma-separated: filesystem,container,chrome,chrome-go)"
required: false
default: "filesystem,container,chrome"
default: "filesystem,container,chrome,chrome-go"
type: string

concurrency:
group: security-advisories-${{ github.ref }}
cancel-in-progress: true

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
Expand All @@ -32,7 +36,7 @@ jobs:
permissions:
contents: read
security-events: write
repository-projects: write
issues: write
packages: read

steps:
Expand All @@ -52,8 +56,19 @@ jobs:
- name: Set up scan parameters
id: params
run: |
echo "severity_filter=${{ github.event.inputs.severity_filter || 'HIGH' }}" >> $GITHUB_OUTPUT
echo "scan_targets=${{ github.event.inputs.scan_targets || 'filesystem,container,chrome' }}" >> $GITHUB_OUTPUT
# Build the correct severity list from the selected minimum level upward
# Trivy --severity takes an explicit comma-separated list, not a "minimum" directive
SELECTED="${{ github.event.inputs.severity_filter || 'HIGH' }}"
case "$SELECTED" in
LOW) SEVERITY_LIST="LOW,MEDIUM,HIGH,CRITICAL" ;;
MEDIUM) SEVERITY_LIST="MEDIUM,HIGH,CRITICAL" ;;
HIGH) SEVERITY_LIST="HIGH,CRITICAL" ;;
CRITICAL) SEVERITY_LIST="CRITICAL" ;;
*) SEVERITY_LIST="HIGH,CRITICAL" ;;
esac
echo "severity_filter=$SELECTED" >> $GITHUB_OUTPUT
echo "severity_list=$SEVERITY_LIST" >> $GITHUB_OUTPUT
echo "scan_targets=${{ github.event.inputs.scan_targets || 'filesystem,container,chrome,chrome-go' }}" >> $GITHUB_OUTPUT
echo "timestamp=$(date -u '+%Y%m%d-%H%M%S')" >> $GITHUB_OUTPUT

- name: Create results directory
Expand All @@ -71,7 +86,7 @@ jobs:
scan-ref: "."
format: "sarif"
output: "trivy-results/filesystem.sarif"
severity: ${{ steps.params.outputs.severity_filter }},CRITICAL
severity: ${{ steps.params.outputs.severity_list }}
skip-setup-trivy: true

- name: Upload filesystem scan to Security tab
Expand All @@ -90,7 +105,7 @@ jobs:
scan-ref: "."
format: "json"
output: "trivy-results/filesystem.json"
severity: ${{ steps.params.outputs.severity_filter }},CRITICAL
severity: ${{ steps.params.outputs.severity_list }}
skip-setup-trivy: true

# Container vulnerability scan
Expand Down Expand Up @@ -128,7 +143,7 @@ jobs:
image-ref: "github-runner:scan"
format: "sarif"
output: "trivy-results/container.sarif"
severity: ${{ steps.params.outputs.severity_filter }},CRITICAL
severity: ${{ steps.params.outputs.severity_list }}
skip-setup-trivy: true
continue-on-error: false

Expand All @@ -147,7 +162,7 @@ jobs:
image-ref: "github-runner:scan"
format: "json"
output: "trivy-results/container.json"
severity: ${{ steps.params.outputs.severity_filter }},CRITICAL
severity: ${{ steps.params.outputs.severity_list }}
skip-setup-trivy: true

- name: Cleanup standard runner image
Expand Down Expand Up @@ -190,7 +205,7 @@ jobs:
image-ref: "github-runner-chrome:scan"
format: "sarif"
output: "trivy-results/chrome.sarif"
severity: ${{ steps.params.outputs.severity_filter }},CRITICAL
severity: ${{ steps.params.outputs.severity_list }}
skip-setup-trivy: true
continue-on-error: false

Expand All @@ -209,14 +224,77 @@ jobs:
image-ref: "github-runner-chrome:scan"
format: "json"
output: "trivy-results/chrome.json"
severity: ${{ steps.params.outputs.severity_filter }},CRITICAL
severity: ${{ steps.params.outputs.severity_list }}
skip-setup-trivy: true

- name: Cleanup Chrome runner image
if: contains(steps.params.outputs.scan_targets, 'chrome')
run: |
echo "Cleaning up Chrome runner image to free space..."
docker rmi github-runner-chrome:scan || true
docker system prune -f || true
echo "Disk space after cleanup:"
df -h

# Chrome-Go runner container scan
- name: Build Chrome-Go runner image for scanning
if: contains(steps.params.outputs.scan_targets, 'chrome-go')
uses: docker/build-push-action@v6
with:
context: ./docker
file: ./docker/Dockerfile.chrome-go
push: false
tags: github-runner-chrome-go:scan
load: true
cache-from: type=gha,scope=advisory-chrome-go-runner
cache-to: type=gha,mode=max,scope=advisory-chrome-go-runner

- name: Verify Chrome-Go image exists
if: contains(steps.params.outputs.scan_targets, 'chrome-go')
run: |
echo "Checking if Chrome-Go image exists..."
docker images github-runner-chrome-go:scan
if ! docker image inspect github-runner-chrome-go:scan >/dev/null 2>&1; then
echo "❌ Image github-runner-chrome-go:scan not found"
exit 1
fi
echo "✅ Image github-runner-chrome-go:scan found"

- name: Run Trivy Chrome-Go container scan
if: contains(steps.params.outputs.scan_targets, 'chrome-go')
uses: aquasecurity/trivy-action@0.34.1
with:
image-ref: "github-runner-chrome-go:scan"
format: "sarif"
output: "trivy-results/chrome-go.sarif"
severity: ${{ steps.params.outputs.severity_list }}
skip-setup-trivy: true
continue-on-error: false

- name: Upload Chrome-Go scan to Security tab
if: contains(steps.params.outputs.scan_targets, 'chrome-go')
uses: github/codeql-action/upload-sarif@v4
with:
sarif_file: "trivy-results/chrome-go.sarif"
category: "advisory-chrome-go-container-scan"
continue-on-error: true

- name: Generate Chrome-Go JSON report
if: contains(steps.params.outputs.scan_targets, 'chrome-go')
uses: aquasecurity/trivy-action@0.34.1
with:
image-ref: "github-runner-chrome-go:scan"
format: "json"
output: "trivy-results/chrome-go.json"
severity: ${{ steps.params.outputs.severity_list }}
skip-setup-trivy: true

# Generate comprehensive security summary
- name: Install jq for JSON processing
run: sudo apt-get update && sudo apt-get install -y jq

- name: Generate Security Summary
id: summary
run: |
echo "## 🔒 Security Scan Summary" >> $GITHUB_STEP_SUMMARY
echo "📅 **Scan Date**: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
Expand Down Expand Up @@ -285,6 +363,66 @@ jobs:
echo "high-count=$total_high" >> $GITHUB_OUTPUT
echo "total-count=$total_all" >> $GITHUB_OUTPUT

# Create GitHub issue when critical vulnerabilities are found
- name: Create issue for critical vulnerabilities
if: steps.summary.outputs.critical-count > 0
uses: actions/github-script@v8
with:
script: |
const critical = '${{ steps.summary.outputs.critical-count }}';
const high = '${{ steps.summary.outputs.high-count }}';
const total = '${{ steps.summary.outputs.total-count }}';
const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
const securityUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/security`;
const dateStr = new Date().toISOString().split('T')[0];

// Check for existing open issue to avoid duplicates
const { data: existingIssues } = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'security,critical'
});

const duplicateTitle = `🔴 ${critical} CRITICAL vulnerabilities detected`;
const alreadyOpen = existingIssues.some(issue =>
issue.title.includes('CRITICAL vulnerabilities detected')
);

if (alreadyOpen) {
console.log('⚠️ An open critical vulnerability issue already exists — skipping creation.');
return;
}

await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: duplicateTitle,
labels: ['security', 'critical'],
body: [
`## 🚨 Critical Vulnerabilities Detected`,
``,
`The weekly security advisory scan found **${critical} CRITICAL** vulnerabilities that require immediate attention.`,
``,
`| Severity | Count |`,
`|----------|-------|`,
`| 🔴 Critical | ${critical} |`,
`| 🟠 High | ${high} |`,
`| **Total** | **${total}** |`,
``,
`### Actions Required`,
`1. Review the [Security tab](${securityUrl}) for full details`,
`2. Check the [workflow run](${runUrl}) for scan artifacts`,
`3. Prioritize and remediate CRITICAL findings immediately`,
`4. Close this issue once all CRITICAL vulnerabilities are resolved`,
``,
`---`,
`*Automatically created by the Security Advisory Management workflow on ${dateStr}.*`
].join('\n')
});

console.log(`✅ Created critical vulnerability issue with ${critical} CRITICAL findings.`);

- name: Upload Security Reports
uses: actions/upload-artifact@v6
with:
Expand All @@ -306,7 +444,7 @@ jobs:

## 📊 Scan Configuration

- **Severity Filter**: ${{ steps.params.outputs.severity_filter }} and above
- **Severity Filter**: ${{ steps.params.outputs.severity_filter }} and above (${{ steps.params.outputs.severity_list }})
- **Scan Targets**: ${{ steps.params.outputs.scan_targets }}
- **Scanner**: Trivy (Aqua Security)

Expand All @@ -316,6 +454,7 @@ jobs:
- Filesystem dependencies and packages
- Docker container images (standard runner)
- Docker container images (Chrome runner)
- Docker container images (Chrome-Go runner)

Results are uploaded to GitHub's Security tab for detailed analysis and remediation tracking.

Expand Down
39 changes: 32 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -402,19 +402,44 @@ docker compose -f docker/docker-compose.chrome.yml up -d

## 📊 Monitoring

### Health Checks
All runner types expose Prometheus-compatible metrics on port **9091** (container port). See the [Monitoring Quick Start](docs/features/PROMETHEUS_QUICKSTART.md) to get started in 5 minutes.

### Metrics Endpoint

```bash
# Check runner health
curl http://localhost:8080/health
# Standard runner metrics (host port 9091)
curl http://localhost:9091/metrics

# Prometheus metrics
curl http://localhost:9090/metrics
# Chrome runner metrics (host port 9092)
curl http://localhost:9092/metrics

# Grafana dashboard
open http://localhost:3000
# Chrome-Go runner metrics (host port 9093)
curl http://localhost:9093/metrics
```

### Grafana Dashboards

Four pre-built dashboards are provided in `monitoring/grafana/dashboards/`:

| Dashboard | File | Panels |
|---|---|---|
| Runner Overview | `runner-overview.json` | 12 |
| DORA Metrics | `dora-metrics.json` | 12 |
| Performance Trends | `performance-trends.json` | 14 |
| Job Analysis | `job-analysis.json` | 16 |

Import them into your Grafana instance or use the provisioning config for auto-loading.

### Documentation

- [Quick Start](docs/features/PROMETHEUS_QUICKSTART.md) — 5-minute setup
- [Setup Guide](docs/features/PROMETHEUS_SETUP.md) — Full configuration
- [Usage Guide](docs/features/PROMETHEUS_USAGE.md) — PromQL queries and alerts
- [Metrics Reference](docs/features/PROMETHEUS_METRICS_REFERENCE.md) — All metric definitions
- [Architecture](docs/features/PROMETHEUS_ARCHITECTURE.md) — System internals
- [Troubleshooting](docs/features/PROMETHEUS_TROUBLESHOOTING.md) — Common issues
- [API Reference](docs/API.md) — Endpoint details

## 🔧 Maintenance

### Scaling
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.5.0
2.6.0
15 changes: 14 additions & 1 deletion config/chrome-go-runner.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,19 @@ GO_TEST_TIMEOUT=10m
# Go Module Cache
GOMODCACHE=/home/runner/go/pkg/mod

# ==========================================
# OPTIONAL: Prometheus Metrics Configuration
# ==========================================

# Runner type label exposed in Prometheus metrics
# RUNNER_TYPE=chrome-go

# Port for the Prometheus metrics endpoint (container-internal; host port mapped in docker-compose)
# METRICS_PORT=9091

# Interval in seconds between metrics collection updates
# METRICS_UPDATE_INTERVAL=30

# ==========================================
# PERFORMANCE AND RESOURCE CONFIGURATION
# ==========================================
Expand Down Expand Up @@ -164,4 +177,4 @@ RUNNER_WORKDIR=/home/runner/_work
# For performance issues:
# - Adjust memory limits based on available resources
# - Consider using RUNNER_EPHEMERAL=true for cleaner runs
# - Monitor resource usage with docker stats
# - Monitor resource usage with docker stats
Loading