diff --git a/codebundles/mongodb-atlas-operations-health/.runwhen/generation-rules/mongodb-atlas-operations-health.yaml b/codebundles/mongodb-atlas-operations-health/.runwhen/generation-rules/mongodb-atlas-operations-health.yaml new file mode 100644 index 00000000..2189fc4a --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/.runwhen/generation-rules/mongodb-atlas-operations-health.yaml @@ -0,0 +1,22 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + platform: mongodb_atlas + generationRules: + - resourceTypes: + - mongodb_atlas_project + matchRules: + - type: pattern + pattern: ".+" + properties: [name] + mode: substring + slxs: + - baseName: atlas-operations-health + levelOfDetail: basic + qualifiers: ["organization", "project"] + baseTemplateName: mongodb-atlas-operations-health + outputItems: + - type: slx + - type: sli + - type: runbook + templateName: mongodb-atlas-operations-health-taskset.yaml diff --git a/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-sli.yaml b/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-sli.yaml new file mode 100644 index 00000000..acbfc3dc --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-sli.yaml @@ -0,0 +1,46 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelIndicator +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + displayUnitsLong: Health Score + displayUnitsShort: score + locations: + - {{default_location}} + description: Computes a 0–1 MongoDB Atlas operations posture score (alerts, backup coverage, network baseline) for the discovered project. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/mongodb-atlas-operations-health/sli.robot + intervalStrategy: intermezzo + intervalSeconds: 300 + configProvided: + - name: ATLAS_PROJECT_ID + value: "{{ match_resource.resource.project_id | default(match_resource.id) }}" + - name: ATLAS_ORG_ID + value: "{{ match_resource.resource.organization_id | default('') }}" + - name: CLUSTER_FILTER + value: "{{ custom.atlas_cluster_filter | default('') }}" + secretsProvided: + {% if wb_version %} + {% include "mongodb-atlas-auth.yaml" ignore missing %} + {% else %} + - name: atlas_api_key_credentials + workspaceKey: {{ custom.atlas_api_key_credentials | default("atlas_api_key_credentials") }} + {% endif %} + alertConfig: + tasks: + persona: eager-edgar + sessionTTL: 10m diff --git a/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-slx.yaml b/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-slx.yaml new file mode 100644 index 00000000..9a0eee62 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-slx.yaml @@ -0,0 +1,29 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://cdn.simpleicons.org/mongodb/47A248 + alias: MongoDB Atlas Operations Health — {{ match_resource.name | default(match_resource.qualified_name) }} + asMeasuredBy: Aggregate SLI from Atlas alerts, dedicated-cluster backup flags, and project IP access posture APIs. + configProvided: + - name: SLX_PLACEHOLDER + value: SLX_PLACEHOLDER + owners: + - {{workspace.owner_email}} + statement: Atlas project alerting, backup continuity, and network allowlists should stay within organizational guardrails. + additionalContext: + qualified_name: "{{ match_resource.qualified_name }}" + tags: + - name: cloud + value: mongodb_atlas + - name: service + value: atlas + - name: scope + value: project + - name: access + value: read-only diff --git a/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-taskset.yaml b/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-taskset.yaml new file mode 100644 index 00000000..6ff7c148 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/.runwhen/templates/mongodb-atlas-operations-health-taskset.yaml @@ -0,0 +1,39 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + description: Evaluates MongoDB Atlas project operations posture (alerts, backups, IP access) via the Admin API. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/mongodb-atlas-operations-health/runbook.robot + configProvided: + - name: ATLAS_PROJECT_ID + value: "{{ match_resource.resource.project_id | default(match_resource.id) }}" + - name: ATLAS_ORG_ID + value: "{{ match_resource.resource.organization_id | default('') }}" + - name: CLUSTER_FILTER + value: "{{ custom.atlas_cluster_filter | default('') }}" + - name: ALERT_LOOKBACK_HOURS + value: "{{ custom.atlas_alert_lookback_hours | default('24') }}" + secretsProvided: + {% if wb_version %} + {% include "mongodb-atlas-auth.yaml" ignore missing %} + {% else %} + - name: atlas_api_key_credentials + workspaceKey: {{ custom.atlas_api_key_credentials | default("atlas_api_key_credentials") }} + {% endif %} diff --git a/codebundles/mongodb-atlas-operations-health/.test/README.md b/codebundles/mongodb-atlas-operations-health/.test/README.md new file mode 100644 index 00000000..15ed6a8d --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/.test/README.md @@ -0,0 +1,27 @@ +# Testing mongodb-atlas-operations-health + +This bundle talks to the live MongoDB Atlas Admin API. Automated tests in CI are limited to shell syntax checks so we never embed real Atlas API keys. + +## Prerequisites + +- MongoDB Atlas project with a Project Read Only (or higher) API key +- `ATLAS_PROJECT_ID` and credentials matching the workspace secret format documented in the bundle `README.md` + +## Local validation + +From `.test/`: + +```bash +task validate-scripts +``` + +## Manual integration + +Export `ATLAS_PROJECT_ID` and either `ATLAS_PUBLIC_API_KEY` / `ATLAS_PRIVATE_API_KEY` or `ATLAS_API_KEY_CREDENTIALS` JSON, then run individual scripts from the bundle root, for example: + +```bash +cd .. +./check-atlas-open-alerts.sh +``` + +Expect `atlas_*_issues.json` files and human-readable stdout suitable for Robot `Add Pre To Report`. diff --git a/codebundles/mongodb-atlas-operations-health/.test/Taskfile.yaml b/codebundles/mongodb-atlas-operations-health/.test/Taskfile.yaml new file mode 100644 index 00000000..81d8fbe3 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/.test/Taskfile.yaml @@ -0,0 +1,12 @@ +version: "3" + +tasks: + default: + desc: "Syntax-check Atlas helper and task scripts (no live API calls)" + cmds: + - task: validate-scripts + + validate-scripts: + desc: "Run bash -n on bundle shell scripts" + cmds: + - bash .test/validate-bundle-scripts.sh diff --git a/codebundles/mongodb-atlas-operations-health/.test/validate-bundle-scripts.sh b/codebundles/mongodb-atlas-operations-health/.test/validate-bundle-scripts.sh new file mode 100755 index 00000000..681ff1d1 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/.test/validate-bundle-scripts.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail +# Static validation only — MongoDB Atlas test projects require customer-provided org/project credentials. +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +failed=0 +while IFS= read -r -d '' f; do + if ! bash -n "$f"; then + echo "bash -n failed: $f" >&2 + failed=1 + fi +done < <(find "$ROOT" -maxdepth 1 -name '*.sh' -print0) +exit "$failed" diff --git a/codebundles/mongodb-atlas-operations-health/README.md b/codebundles/mongodb-atlas-operations-health/README.md new file mode 100644 index 00000000..4366738c --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/README.md @@ -0,0 +1,46 @@ +# MongoDB Atlas Operations Health + +This CodeBundle performs read-only checks against the MongoDB Atlas Admin API for a single project: live alert posture, whether dedicated clusters report cloud backup enabled, and whether the project IP access list shows risky openness or inconsistencies with public cluster DNS names. + +## Overview + +- **Alert posture**: Surfaces OPEN and TRACKING alerts (and CLOSED items whose timestamps fall inside `ALERT_LOOKBACK_HOURS` when date parsing works), scoped by optional `CLUSTER_FILTER`, with a short blast-radius summary. +- **Backup coverage**: For `REPLICA_SET`, `SHARDED`, and `GEOSHARDED` clusters, validates `backupEnabled` / `providerBackupEnabled` and records when the cloud backup schedule endpoint is unavailable on lower tiers (downgraded context instead of hard failure). +- **Network access**: Flags `0.0.0.0/0`, very broad `0.0.0.0/N` prefixes, and an empty allowlist when in-scope clusters still publish `connectionStrings.standardSrv` hostnames. +- **SLI**: Averages three binary dimensions into a 0–1 operations health score for periodic monitoring. + +API reference: [Atlas Admin API v2](https://www.mongodb.com/docs/api/doc/atlas-admin-api-v2/). + +## Configuration + +### Required variables + +- `ATLAS_PROJECT_ID`: 24-hex MongoDB Atlas project (group) id used in `/groups/{groupId}/...` paths. + +### Optional variables + +- `ATLAS_ORG_ID`: Organization id for workspace context (reserved for future org-level checks). +- `CLUSTER_FILTER`: Comma-separated Atlas cluster names to limit alert, backup, and network correlation (default: empty, meaning all clusters in the project). +- `ALERT_LOOKBACK_HOURS`: Hours of history for treating recently CLOSED alerts as relevant in the deep-dive runbook task (default: `24`). + +### Secrets + +- `atlas_api_key_credentials`: Programmatic API key material — preferred shape is JSON `{"ATLAS_PUBLIC_API_KEY":"...","ATLAS_PRIVATE_API_KEY":"..."}` (aliases `publicKey` / `privateKey` are also accepted). Plain multi-line `KEY=value` text works as well. The RunWhen platform injects this for digest authentication to `https://cloud.mongodb.com/api/atlas/v2`. + +## Tasks overview + +### Check MongoDB Atlas Open Alerts for Project + +Paginates `GET /groups/{groupId}/alerts`, applies the cluster scope filter, evaluates OPEN/TRACKING and recent CLOSED signals, and raises a consolidated issue when anything relevant is found. + +### Verify MongoDB Atlas Backup Configuration for Project + +Lists clusters, checks backup flags on dedicated layouts, and probes `GET .../backup/schedule` for extra context when the tier supports it. + +### Review MongoDB Atlas Network Access for Project + +Reads `GET /groups/{groupId}/accessList`, warns on open CIDR patterns, and combines cluster connection string hints to detect empty allowlists paired with public SRV endpoints. + +### SLI (sli.robot) + +Runs lightweight variants of the three checks and publishes sub-metrics `atlas_alerts_clear`, `atlas_backup_ok`, and `atlas_network_ok` plus the aggregate health score. diff --git a/codebundles/mongodb-atlas-operations-health/atlas-helpers.sh b/codebundles/mongodb-atlas-operations-health/atlas-helpers.sh new file mode 100755 index 00000000..b6635ea6 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/atlas-helpers.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# Shared Atlas Admin API helpers (digest auth, credential parsing). +# shellcheck shell=bash + +ATLAS_API_ROOT="${ATLAS_API_ROOT:-https://cloud.mongodb.com/api/atlas/v2}" +ATLAS_ACCEPT="${ATLAS_ACCEPT:-application/vnd.atlas.2024-08-05+json}" + +atlas_resolve_credentials() { + if [[ -n "${ATLAS_PUBLIC_API_KEY:-}" && -n "${ATLAS_PRIVATE_API_KEY:-}" ]]; then + return 0 + fi + + local raw="" + raw="${ATLAS_API_KEY_CREDENTIALS:-}" + if [[ -z "$raw" ]]; then + raw="${atlas_api_key_credentials:-}" + fi + if [[ -z "$raw" ]]; then + echo "atlas-helpers: set ATLAS_PUBLIC_API_KEY and ATLAS_PRIVATE_API_KEY, or provide atlas_api_key_credentials / ATLAS_API_KEY_CREDENTIALS (JSON or KEY=value lines)." >&2 + return 1 + fi + + if echo "$raw" | jq -e . >/dev/null 2>&1; then + ATLAS_PUBLIC_API_KEY="$(echo "$raw" | jq -r '.ATLAS_PUBLIC_API_KEY // .publicKey // .username // empty')" + ATLAS_PRIVATE_API_KEY="$(echo "$raw" | jq -r '.ATLAS_PRIVATE_API_KEY // .privateKey // .password // empty')" + else + while IFS= read -r line || [[ -n "$line" ]]; do + [[ "$line" =~ ^[[:space:]]*# ]] && continue + [[ -z "${line// /}" ]] && continue + if [[ "$line" =~ ^ATLAS_PUBLIC_API_KEY[[:space:]]*=[[:space:]]*(.*)$ ]]; then + ATLAS_PUBLIC_API_KEY="${BASH_REMATCH[1]}" + ATLAS_PUBLIC_API_KEY="${ATLAS_PUBLIC_API_KEY%\"}" + ATLAS_PUBLIC_API_KEY="${ATLAS_PUBLIC_API_KEY#\"}" + fi + if [[ "$line" =~ ^ATLAS_PRIVATE_API_KEY[[:space:]]*=[[:space:]]*(.*)$ ]]; then + ATLAS_PRIVATE_API_KEY="${BASH_REMATCH[1]}" + ATLAS_PRIVATE_API_KEY="${ATLAS_PRIVATE_API_KEY%\"}" + ATLAS_PRIVATE_API_KEY="${ATLAS_PRIVATE_API_KEY#\"}" + fi + done <<< "$raw" + fi + + if [[ -z "${ATLAS_PUBLIC_API_KEY:-}" || -z "${ATLAS_PRIVATE_API_KEY:-}" ]]; then + echo "atlas-helpers: could not parse Atlas API keys from credentials payload." >&2 + return 1 + fi + return 0 +} + +atlas_get() { + local path_qs="$1" + local outf code + outf="$(mktemp)" + code="$( + curl -sS -o "$outf" -w "%{http_code}" \ + --digest \ + -u "${ATLAS_PUBLIC_API_KEY}:${ATLAS_PRIVATE_API_KEY}" \ + -H "Accept: ${ATLAS_ACCEPT}" \ + "${ATLAS_API_ROOT%/}/${path_qs#\/}" 2>/dev/null || echo "000" + )" + ATLAS_LAST_HTTP_CODE="$code" + ATLAS_LAST_BODY="$(cat "$outf" 2>/dev/null || true)" + rm -f "$outf" +} + +cluster_matches_filter() { + local cname="$1" + local filt="${CLUSTER_FILTER:-}" + filt="${filt//[[:space:]]/}" + if [[ -z "$filt" ]]; then + return 0 + fi + local IFS=','; local tok + for tok in $filt; do + [[ -z "$tok" ]] && continue + if [[ "$cname" == "$tok" ]]; then + return 0 + fi + done + return 1 +} diff --git a/codebundles/mongodb-atlas-operations-health/check-atlas-open-alerts.sh b/codebundles/mongodb-atlas-operations-health/check-atlas-open-alerts.sh new file mode 100755 index 00000000..d7d6096f --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/check-atlas-open-alerts.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Queries OPEN / TRACKING Atlas project alerts (and recently updated CLOSED within lookback) +# when timestamps are present. Writes JSON issues to atlas_open_alerts_issues.json + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./atlas-helpers.sh +source "${SCRIPT_DIR}/atlas-helpers.sh" + +: "${ATLAS_PROJECT_ID:?Must set ATLAS_PROJECT_ID}" +OUTPUT_FILE="${OUTPUT_FILE:-atlas_open_alerts_issues.json}" +ALERT_LOOKBACK_HOURS="${ALERT_LOOKBACK_HOURS:-24}" + +issues_json='[]' + +if ! atlas_resolve_credentials; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot Authenticate to MongoDB Atlas API for Project \`${ATLAS_PROJECT_ID}\`" \ + --arg details "Missing or unparsable Atlas API credentials." \ + --arg severity "4" \ + --arg next_steps "Configure workspace secret atlas_api_key_credentials (JSON with ATLAS_PUBLIC_API_KEY and ATLAS_PRIVATE_API_KEY) or set keys in the environment." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + echo "$issues_json" >"$OUTPUT_FILE" + echo "Atlas credentials missing." + exit 0 +fi + +acc='[]' +page=1 +while true; do + atlas_get "groups/${ATLAS_PROJECT_ID}/alerts?itemsPerPage=100&pageNum=${page}&includeCount=true" + if [[ "$ATLAS_LAST_HTTP_CODE" != "200" ]]; then + err="$(echo "$ATLAS_LAST_BODY" | jq -r '.detail // .reason // .error // "HTTP '"$ATLAS_LAST_HTTP_CODE"'"' 2>/dev/null || echo "HTTP $ATLAS_LAST_HTTP_CODE")" + issues_json=$(echo "$issues_json" | jq \ + --arg title "Atlas Alerts API Error for Project \`${ATLAS_PROJECT_ID}\`" \ + --arg details "GET alerts failed: ${err}" \ + --arg severity "4" \ + --arg next_steps "Verify ATLAS_PROJECT_ID, API key roles (Project Read Only+), and network access to cloud.mongodb.com." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + echo "$issues_json" >"$OUTPUT_FILE" + echo "Alerts API error: $err" + exit 0 + fi + chunk="$(echo "$ATLAS_LAST_BODY" | jq -c '.results // []')" + len="$(echo "$chunk" | jq 'length')" + acc="$(echo "$acc" "$chunk" | jq -s 'add')" + if [[ "$len" -lt 100 ]]; then + break + fi + page=$((page + 1)) + if [[ "$page" -gt 50 ]]; then + echo "Stopped pagination at page 50 (safety cap)." + break + fi +done + +lookback_sec=$((ALERT_LOOKBACK_HOURS * 3600)) +now_ts="$(date +%s)" + +is_recent_ts() { + local ts="$1" + [[ -z "$ts" || "$ts" == "null" ]] && return 1 + local alert_ts + if ! alert_ts="$(date -d "$ts" +%s 2>/dev/null)"; then + return 1 + fi + [[ $((now_ts - alert_ts)) -le $lookback_sec ]] +} + +critical_heuristic() { + local t="$1" + echo "$t" | grep -qiE 'DOWN|UNREACHABLE|FAIL|PRIMARY|NO_PRIMARY|INACTIVE|OUTAGE' && return 0 + return 1 +} + +declare -a interesting_alerts=() +declare -a clusters_hit=() + +while IFS= read -r row; do + [[ -z "$row" ]] && continue + cname="$(echo "$row" | jq -r '.clusterName // .clusterId // ""')" + if [[ -n "$cname" ]] && ! cluster_matches_filter "$cname"; then + continue + fi + st="$(echo "$row" | jq -r '.status // ""')" + typ="$(echo "$row" | jq -r '.typeName // .eventTypeName // .metricName // "alert"')" + hum="$(echo "$row" | jq -r '.humanReadable // .message // ""' | head -c 500 | tr '|' ' ')" + updated="$(echo "$row" | jq -r '.updated // .lastNotified // .created // ""')" + + include=0 + if [[ "$st" == "OPEN" || "$st" == "TRACKING" ]]; then + include=1 + elif [[ "$st" == "CLOSED" ]] && is_recent_ts "$updated"; then + include=1 + fi + + if [[ "$include" -eq 1 ]]; then + interesting_alerts+=("${st}|${typ}|${cname}|${hum}") + if [[ -n "$cname" ]]; then + clusters_hit+=("$cname") + fi + fi +done < <(echo "$acc" | jq -c '.[]') + +uniq_clusters="$(printf '%s\n' "${clusters_hit[@]:-}" | sort -u | paste -sd, -)" +n="${#interesting_alerts[@]}" + +summary_lines="$(printf '%s\n' "${interesting_alerts[@]:-}" | head -25)" +blast_radius="$(printf '%s\n' "${clusters_hit[@]:-}" | sort -u | grep -cve '^$' || true)" + +echo "Open/recent alerts in scope: ${n} (distinct clusters in blast radius: ${blast_radius})" +echo "${summary_lines}" + +if [[ "$n" -gt 0 ]]; then + max_sev=2 + for line in "${interesting_alerts[@]}"; do + typ="$(echo "$line" | cut -d'|' -f2)" + if critical_heuristic "$typ"; then + max_sev=4 + break + fi + done + det="count=${n}; clusters=${uniq_clusters:-n/a}; sample=(first 25 lines):"$'\n'"${summary_lines}" + issues_json=$(echo "$issues_json" | jq \ + --arg title "MongoDB Atlas Alerts Require Attention in Project \`${ATLAS_PROJECT_ID}\`" \ + --arg details "$det" \ + --argjson severity "$max_sev" \ + --arg next_steps "Triage OPEN/TRACKING (and recent CLOSED) items in Atlas UI Alerts tab; correlate with clusterName; follow Atlas alert type runbooks." \ + '. += [{ "title": $title, "details": $details, "severity": $severity, "next_steps": $next_steps }]') +fi + +echo "$issues_json" | jq . >"$OUTPUT_FILE" +echo "Wrote $OUTPUT_FILE" +exit 0 diff --git a/codebundles/mongodb-atlas-operations-health/review-atlas-network-access.sh b/codebundles/mongodb-atlas-operations-health/review-atlas-network-access.sh new file mode 100755 index 00000000..c1295041 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/review-atlas-network-access.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Audits project IP access list for overly permissive CIDRs and empty lists when +# clusters advertise public SRV connection strings. Writes atlas_network_issues.json + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./atlas-helpers.sh +source "${SCRIPT_DIR}/atlas-helpers.sh" + +: "${ATLAS_PROJECT_ID:?Must set ATLAS_PROJECT_ID}" +OUTPUT_FILE="${OUTPUT_FILE:-atlas_network_issues.json}" + +issues_json='[]' + +if ! atlas_resolve_credentials; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot Authenticate to MongoDB Atlas API for Project \`${ATLAS_PROJECT_ID}\`" \ + --arg details "Missing or unparsable Atlas API credentials." \ + --arg severity "4" \ + --arg next_steps "Configure atlas_api_key_credentials or API key env vars." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + echo "$issues_json" >"$OUTPUT_FILE" + exit 0 +fi + +atlas_get "groups/${ATLAS_PROJECT_ID}/accessList?itemsPerPage=500" +if [[ "$ATLAS_LAST_HTTP_CODE" != "200" ]]; then + err="$(echo "$ATLAS_LAST_BODY" | jq -r '.detail // .reason // empty' 2>/dev/null || echo "HTTP $ATLAS_LAST_HTTP_CODE")" + issues_json=$(echo "$issues_json" | jq \ + --arg title "Atlas Project IP Access List API Error for \`${ATLAS_PROJECT_ID}\`" \ + --arg details "GET accessList failed: ${err}" \ + --arg severity "3" \ + --arg next_steps "Confirm Atlas Admin API access; some organizations restrict IP access list reads." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + echo "$issues_json" >"$OUTPUT_FILE" + exit 0 +fi + +entries="$(echo "$ATLAS_LAST_BODY" | jq -c '.results // []')" +count="$(echo "$entries" | jq 'length')" +echo "Project IP access list entries: $count" + +while IFS= read -r row; do + [[ -z "$row" ]] && continue + cidr="$(echo "$row" | jq -r '.cidrBlock // empty')" + ip="$(echo "$row" | jq -r '.ipAddress // empty')" + comment="$(echo "$row" | jq -r '.comment // ""')" + target="${cidr:-$ip}" + if [[ "$target" == "0.0.0.0/0" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Overly Permissive Atlas Network Entry \`${target}\`" \ + --arg details "comment=${comment:-none}; full_entry=$(echo "$row" | jq -c .)" \ + --arg severity "3" \ + --arg next_steps "Replace open CIDR 0.0.0.0/0 with narrow corporate egress IPs or move workloads to private networking / VPC peering." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + fi + if [[ "$cidr" =~ ^0\.0\.0\.0/[0-9]{1,2}$ ]] && [[ "$cidr" != "0.0.0.0/0" ]]; then + wide="${cidr##*/}" + if [[ "$wide" -le 8 ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Broad Atlas Network CIDR \`${cidr}\`" \ + --arg details "comment=${comment:-none}" \ + --arg severity "2" \ + --arg next_steps "Tighten CIDR to minimum required ranges; document temporary exceptions in Atlas entry comments with owners and expiry." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + fi + fi +done < <(echo "$entries" | jq -c '.[]') + +atlas_get "groups/${ATLAS_PROJECT_ID}/clusters?itemsPerPage=500" +clusters_body="$ATLAS_LAST_BODY" +has_public_srv=0 +if [[ "$ATLAS_LAST_HTTP_CODE" == "200" ]]; then + while IFS= read -r row; do + name="$(echo "$row" | jq -r '.name')" + cluster_matches_filter "$name" || continue + srv="$(echo "$row" | jq -r '.connectionStrings.standardSrv // empty')" + if [[ -n "$srv" ]]; then + has_public_srv=1 + fi + done < <(echo "$clusters_body" | jq -c '.results // [] | .[]') +else + echo "Clusters fetch for network correlation failed (HTTP ${ATLAS_LAST_HTTP_CODE}); skipping empty-list heuristic." +fi + +if [[ "$count" -eq 0 && "$has_public_srv" -eq 1 ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Empty Atlas IP Access List with Public Cluster SRV Endpoints" \ + --arg details "No project IP allowlist entries but at least one in-scope cluster exposes connectionStrings.standardSrv." \ + --arg severity "2" \ + --arg next_steps "Confirm whether traffic is locked via Private Endpoint / peering only. If clusters are internet-reachable, add least-privilege CIDRs; otherwise document the private-only architecture." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') +fi + +echo "$issues_json" | jq . >"$OUTPUT_FILE" +echo "Wrote $OUTPUT_FILE" +exit 0 diff --git a/codebundles/mongodb-atlas-operations-health/runbook.robot b/codebundles/mongodb-atlas-operations-health/runbook.robot new file mode 100644 index 00000000..885c2463 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/runbook.robot @@ -0,0 +1,169 @@ +*** Settings *** +Documentation Read-only MongoDB Atlas project operations posture: open and recent alerts, cloud backup coverage on dedicated clusters, and project IP access patterns that indicate permissive or inconsistent network exposure. +Metadata Author rw-codebundle-agent +Metadata Display Name MongoDB Atlas Operations Health +Metadata Supports mongodb_atlas atlas alerts backup networking project + +Force Tags mongodb_atlas atlas operations backup alerts network health + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + +*** Tasks *** +Check MongoDB Atlas Open Alerts for Project `${ATLAS_PROJECT_ID}` + [Documentation] Queries Atlas Admin API alerts for OPEN and TRACKING conditions (plus recent CLOSED when timestamps parse) scoped by CLUSTER_FILTER and summarizes blast radius for in-scope clusters. + [Tags] mongodb_atlas alerts access:read-only data:events + + ${result}= RW.CLI.Run Bash File + ... bash_file=check-atlas-open-alerts.sh + ... env=${env} + ... secret__atlas_api_key_credentials=${atlas_api_key_credentials} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=ATLAS_PROJECT_ID="${ATLAS_PROJECT_ID}" ./check-atlas-open-alerts.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat atlas_open_alerts_issues.json + ... timeout_seconds=30 + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for alerts task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=No unresolved or newly reopened Atlas alerts for scoped clusters without operator acknowledgement + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report Atlas open/recent alerts analysis:\n${result.stdout} + +Verify MongoDB Atlas Backup Configuration for Project `${ATLAS_PROJECT_ID}` + [Documentation] Confirms backupEnabled signals on REPLICA_SET, SHARDED, and GEOSHARDED clusters, treats cloud backup schedule 404 as an unsupported tier hint, and flags clusters lacking backup when the API reports it disabled. + [Tags] mongodb_atlas backup access:read-only data:config + + ${result}= RW.CLI.Run Bash File + ... bash_file=verify-atlas-backup-config.sh + ... env=${env} + ... secret__atlas_api_key_credentials=${atlas_api_key_credentials} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=ATLAS_PROJECT_ID="${ATLAS_PROJECT_ID}" ./verify-atlas-backup-config.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat atlas_backup_issues.json + ... timeout_seconds=30 + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for backup task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Production-typed dedicated clusters should run with Atlas cloud backup / PITR enabled when supported + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report Atlas backup configuration review:\n${result.stdout} + +Review MongoDB Atlas Network Access for Project `${ATLAS_PROJECT_ID}` + [Documentation] Audits project IP access list entries for open CIDRs and correlates an empty list with clusters that still publish public SRV hostnames so risky combinations are visible to operators. + [Tags] mongodb_atlas network access:read-only data:security-config + + ${result}= RW.CLI.Run Bash File + ... bash_file=review-atlas-network-access.sh + ... env=${env} + ... secret__atlas_api_key_credentials=${atlas_api_key_credentials} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=ATLAS_PROJECT_ID="${ATLAS_PROJECT_ID}" ./review-atlas-network-access.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat atlas_network_issues.json + ... timeout_seconds=30 + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for network task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Least-privilege network allowlisting or documented private-only connectivity without contradictory public surface area + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report Atlas project network access review:\n${result.stdout} + +*** Keywords *** +Suite Initialization + TRY + ${atlas_api_key_credentials}= RW.Core.Import Secret atlas_api_key_credentials + ... type=string + ... description=MongoDB Atlas API key pair as JSON or KEY=value text with ATLAS_PUBLIC_API_KEY and ATLAS_PRIVATE_API_KEY + ... pattern=\w* + Set Suite Variable ${atlas_api_key_credentials} ${atlas_api_key_credentials} + EXCEPT + Log atlas_api_key_credentials secret missing; tasks will surface an auth issue. WARN + Set Suite Variable ${atlas_api_key_credentials} ${EMPTY} + END + + ${ATLAS_PROJECT_ID}= RW.Core.Import User Variable ATLAS_PROJECT_ID + ... type=string + ... description=MongoDB Atlas project (group) identifier used in Admin API paths. + ... pattern=\w+ + ${ATLAS_ORG_ID}= RW.Core.Import User Variable ATLAS_ORG_ID + ... type=string + ... description=Optional Atlas organization id for discovery context and future org-level checks. + ... pattern=^[a-fA-F0-9]*$ + ... default= + ${CLUSTER_FILTER}= RW.Core.Import User Variable CLUSTER_FILTER + ... type=string + ... description=Comma-separated cluster names to limit alert, backup, and network correlation scopes. + ... pattern=^[\w[:space:],.-]*$ + ... default= + ${ALERT_LOOKBACK_HOURS}= RW.Core.Import User Variable ALERT_LOOKBACK_HOURS + ... type=string + ... description=Hours of history to consider when treating recently CLOSED alerts as relevant in the deep-dive task. + ... pattern=^\d+$ + ... default=24 + + Set Suite Variable ${ATLAS_PROJECT_ID} ${ATLAS_PROJECT_ID} + Set Suite Variable ${ATLAS_ORG_ID} ${ATLAS_ORG_ID} + Set Suite Variable ${CLUSTER_FILTER} ${CLUSTER_FILTER} + Set Suite Variable ${ALERT_LOOKBACK_HOURS} ${ALERT_LOOKBACK_HOURS} + + ${env}= Create Dictionary + ... ATLAS_PROJECT_ID=${ATLAS_PROJECT_ID} + ... ATLAS_ORG_ID=${ATLAS_ORG_ID} + ... CLUSTER_FILTER=${CLUSTER_FILTER} + ... ALERT_LOOKBACK_HOURS=${ALERT_LOOKBACK_HOURS} + Set Suite Variable ${env} ${env} diff --git a/codebundles/mongodb-atlas-operations-health/sli-atlas-alerts-score.sh b/codebundles/mongodb-atlas-operations-health/sli-atlas-alerts-score.sh new file mode 100755 index 00000000..dcfd4e6a --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/sli-atlas-alerts-score.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -euo pipefail + +# SLI dimension: 1 when no OPEN/TRACKING alerts for in-scope clusters (first page only). +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./atlas-helpers.sh +source "${SCRIPT_DIR}/atlas-helpers.sh" + +: "${ATLAS_PROJECT_ID:?}" +if ! atlas_resolve_credentials; then + jq -n '{score:0,"reason":"no-credentials"}' + exit 0 +fi + +atlas_get "groups/${ATLAS_PROJECT_ID}/alerts?itemsPerPage=100&pageNum=1" +if [[ "$ATLAS_LAST_HTTP_CODE" != "200" ]]; then + jq -n --arg c "$ATLAS_LAST_HTTP_CODE" '{score:0,"reason":("http-"+$c)}' + exit 0 +fi + +open=0 +while IFS= read -r row; do + [[ -z "$row" ]] && continue + st="$(echo "$row" | jq -r '.status // ""')" + cname="$(echo "$row" | jq -r '.clusterName // ""')" + cluster_matches_filter "$cname" || continue + if [[ "$st" == "OPEN" || "$st" == "TRACKING" ]]; then + open=$((open + 1)) + fi +done < <(echo "$ATLAS_LAST_BODY" | jq -c '.results[]?') + +if [[ "$open" -eq 0 ]]; then + jq -n '{score:1}' +else + jq -n --argjson n "$open" '{score:0,"open_tracking":$n}' +fi diff --git a/codebundles/mongodb-atlas-operations-health/sli-atlas-backup-score.sh b/codebundles/mongodb-atlas-operations-health/sli-atlas-backup-score.sh new file mode 100755 index 00000000..979bab44 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/sli-atlas-backup-score.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail + +# SLI dimension: 1 when every in-scope dedicated cluster reports backup enabled. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./atlas-helpers.sh +source "${SCRIPT_DIR}/atlas-helpers.sh" + +: "${ATLAS_PROJECT_ID:?}" +if ! atlas_resolve_credentials; then + jq -n '{score:0,"reason":"no-credentials"}' + exit 0 +fi + +atlas_get "groups/${ATLAS_PROJECT_ID}/clusters?itemsPerPage=500" +if [[ "$ATLAS_LAST_HTTP_CODE" != "200" ]]; then + jq -n --arg c "$ATLAS_LAST_HTTP_CODE" '{score:0,"reason":("http-"+$c)}' + exit 0 +fi + +bad=0 +checked=0 +while IFS= read -r row; do + [[ -z "$row" ]] && continue + name="$(echo "$row" | jq -r '.name')" + cluster_matches_filter "$name" || continue + ctype="$(echo "$row" | jq -r '.clusterType // ""')" + if [[ "$ctype" == "REPLICA_SET" || "$ctype" == "SHARDED" || "$ctype" == "GEOSHARDED" ]]; then + checked=$((checked + 1)) + backup_on="$(echo "$row" | jq -r '(.backupEnabled // false) or (.providerBackupEnabled // false)')" + if [[ "$backup_on" != "true" ]]; then + bad=$((bad + 1)) + fi + fi +done < <(echo "$ATLAS_LAST_BODY" | jq -c '.results[]?') + +if [[ "$checked" -eq 0 ]]; then + jq -n '{score:1,"note":"no-dedicated-clusters-in-scope"}' +elif [[ "$bad" -eq 0 ]]; then + jq -n '{score:1}' +else + jq -n --argjson bad "$bad" '{score:0,"clusters_without_backup":$bad}' +fi diff --git a/codebundles/mongodb-atlas-operations-health/sli-atlas-network-score.sh b/codebundles/mongodb-atlas-operations-health/sli-atlas-network-score.sh new file mode 100755 index 00000000..25d626fb --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/sli-atlas-network-score.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -euo pipefail + +# SLI dimension: 1 when no 0.0.0.0/0 entry and not empty allowlist with public SRV. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./atlas-helpers.sh +source "${SCRIPT_DIR}/atlas-helpers.sh" + +: "${ATLAS_PROJECT_ID:?}" +if ! atlas_resolve_credentials; then + jq -n '{score:0,"reason":"no-credentials"}' + exit 0 +fi + +atlas_get "groups/${ATLAS_PROJECT_ID}/accessList?itemsPerPage=500" +if [[ "$ATLAS_LAST_HTTP_CODE" != "200" ]]; then + jq -n --arg c "$ATLAS_LAST_HTTP_CODE" '{score:0,"reason":("http-"+$c)}' + exit 0 +fi + +open_cidr=0 +while IFS= read -r row; do + [[ -z "$row" ]] && continue + cidr="$(echo "$row" | jq -r '.cidrBlock // empty')" + ip="$(echo "$row" | jq -r '.ipAddress // empty')" + target="${cidr:-$ip}" + [[ "$target" == "0.0.0.0/0" ]] && open_cidr=1 +done < <(echo "$ATLAS_LAST_BODY" | jq -c '.results[]?') + +count="$(echo "$ATLAS_LAST_BODY" | jq '.results|length')" + +atlas_get "groups/${ATLAS_PROJECT_ID}/clusters?itemsPerPage=500" +has_public_srv=0 +if [[ "$ATLAS_LAST_HTTP_CODE" == "200" ]]; then + while IFS= read -r row; do + name="$(echo "$row" | jq -r '.name')" + cluster_matches_filter "$name" || continue + srv="$(echo "$row" | jq -r '.connectionStrings.standardSrv // empty')" + [[ -n "$srv" ]] && has_public_srv=1 + done < <(echo "$ATLAS_LAST_BODY" | jq -c '.results[]?') +fi + +if [[ "$open_cidr" -ne 0 ]]; then + jq -n '{score:0,"reason":"open-cidr"}' + exit 0 +fi + +if [[ "$count" -eq 0 && "$has_public_srv" -eq 1 ]]; then + jq -n '{score:0,"reason":"empty-list-public-srv"}' + exit 0 +fi + +jq -n '{score:1}' diff --git a/codebundles/mongodb-atlas-operations-health/sli.robot b/codebundles/mongodb-atlas-operations-health/sli.robot new file mode 100644 index 00000000..5a4439a6 --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/sli.robot @@ -0,0 +1,128 @@ +*** Settings *** +Documentation Measures MongoDB Atlas project operations health as the mean of three binary signals — no OPEN/TRACKING alerts in the first alerts page for scoped clusters, backup enabled on scoped dedicated clusters, and no open CIDR or empty-allowlist/public-SRV mismatch — producing a 0–1 score for alerting. +Metadata Author rw-codebundle-agent +Metadata Display Name MongoDB Atlas Operations Health SLI +Metadata Supports mongodb_atlas atlas operations sli + +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + +*** Keywords *** +Suite Initialization + TRY + ${atlas_api_key_credentials}= RW.Core.Import Secret atlas_api_key_credentials + ... type=string + ... description=MongoDB Atlas API key pair as JSON or KEY=value text with ATLAS_PUBLIC_API_KEY and ATLAS_PRIVATE_API_KEY + ... pattern=\w* + Set Suite Variable ${atlas_api_key_credentials} ${atlas_api_key_credentials} + EXCEPT + Log atlas_api_key_credentials secret missing; SLI will score 0 dimensions. WARN + Set Suite Variable ${atlas_api_key_credentials} ${EMPTY} + END + + ${ATLAS_PROJECT_ID}= RW.Core.Import User Variable ATLAS_PROJECT_ID + ... type=string + ... description=MongoDB Atlas project (group) identifier. + ... pattern=\w+ + ${ATLAS_ORG_ID}= RW.Core.Import User Variable ATLAS_ORG_ID + ... type=string + ... description=Optional Atlas organization id (reserved for future checks). + ... pattern=^[a-fA-F0-9]*$ + ... default= + ${CLUSTER_FILTER}= RW.Core.Import User Variable CLUSTER_FILTER + ... type=string + ... description=Comma-separated cluster names to scope SLI sampling. + ... pattern=^[\w[:space:],.-]*$ + ... default= + + Set Suite Variable ${ATLAS_PROJECT_ID} ${ATLAS_PROJECT_ID} + Set Suite Variable ${ATLAS_ORG_ID} ${ATLAS_ORG_ID} + Set Suite Variable ${CLUSTER_FILTER} ${CLUSTER_FILTER} + + ${env}= Create Dictionary + ... ATLAS_PROJECT_ID=${ATLAS_PROJECT_ID} + ... ATLAS_ORG_ID=${ATLAS_ORG_ID} + ... CLUSTER_FILTER=${CLUSTER_FILTER} + Set Suite Variable ${env} ${env} + + Set Suite Variable ${score_alerts} 0 + Set Suite Variable ${score_backup} 0 + Set Suite Variable ${score_network} 0 + +*** Tasks *** +Score Atlas Open Alert Posture + [Documentation] Binary 1 when the first page of GET alerts shows no OPEN or TRACKING items for clusters matching CLUSTER_FILTER. + [Tags] mongodb_atlas sli access:read-only data:metrics + + ${out}= RW.CLI.Run Bash File + ... bash_file=sli-atlas-alerts-score.sh + ... env=${env} + ... secret__atlas_api_key_credentials=${atlas_api_key_credentials} + ... include_in_history=false + ... timeout_seconds=45 + ... cmd_override=ATLAS_PROJECT_ID="${ATLAS_PROJECT_ID}" ./sli-atlas-alerts-score.sh + TRY + ${data}= Evaluate json.loads(r'''${out.stdout}''') json + EXCEPT + Log SLI alerts JSON parse failed; scoring 0. WARN + ${data}= Create Dictionary score=0 + END + ${s}= Set Variable ${data.get('score', 0)} + Set Suite Variable ${score_alerts} ${s} + RW.Core.Push Metric ${s} sub_name=atlas_alerts_clear + +Score Atlas Dedicated Backup Coverage + [Documentation] Binary 1 when every scoped REPLICA_SET, SHARDED, or GEOSHARDED cluster reports backupEnabled or providerBackupEnabled. + [Tags] mongodb_atlas sli access:read-only data:metrics + + ${out}= RW.CLI.Run Bash File + ... bash_file=sli-atlas-backup-score.sh + ... env=${env} + ... secret__atlas_api_key_credentials=${atlas_api_key_credentials} + ... include_in_history=false + ... timeout_seconds=45 + ... cmd_override=ATLAS_PROJECT_ID="${ATLAS_PROJECT_ID}" ./sli-atlas-backup-score.sh + TRY + ${data}= Evaluate json.loads(r'''${out.stdout}''') json + EXCEPT + Log SLI backup JSON parse failed; scoring 0. WARN + ${data}= Create Dictionary score=0 + END + ${s}= Set Variable ${data.get('score', 0)} + Set Suite Variable ${score_backup} ${s} + RW.Core.Push Metric ${s} sub_name=atlas_backup_ok + +Score Atlas Project Network Baseline + [Documentation] Binary 1 when no 0.0.0.0/0 allowlist entry exists and the empty-list/public-SRV heuristic from the runbook is not triggered. + [Tags] mongodb_atlas sli access:read-only data:metrics + + ${out}= RW.CLI.Run Bash File + ... bash_file=sli-atlas-network-score.sh + ... env=${env} + ... secret__atlas_api_key_credentials=${atlas_api_key_credentials} + ... include_in_history=false + ... timeout_seconds=45 + ... cmd_override=ATLAS_PROJECT_ID="${ATLAS_PROJECT_ID}" ./sli-atlas-network-score.sh + TRY + ${data}= Evaluate json.loads(r'''${out.stdout}''') json + EXCEPT + Log SLI network JSON parse failed; scoring 0. WARN + ${data}= Create Dictionary score=0 + END + ${s}= Set Variable ${data.get('score', 0)} + Set Suite Variable ${score_network} ${s} + RW.Core.Push Metric ${s} sub_name=atlas_network_ok + +Generate Aggregate Atlas Operations Health Score + [Documentation] Averages the three binary operations sub-scores into the primary SLI metric. + [Tags] mongodb_atlas sli access:read-only data:metrics + + ${total}= Evaluate int(${score_alerts}) + int(${score_backup}) + int(${score_network}) + ${health_score}= Evaluate ${total} / 3.0 + ${health_score}= Convert To Number ${health_score} 2 + RW.Core.Add To Report MongoDB Atlas operations health score: ${health_score} (alerts=${score_alerts}, backup=${score_backup}, network=${score_network}) + RW.Core.Push Metric ${health_score} diff --git a/codebundles/mongodb-atlas-operations-health/verify-atlas-backup-config.sh b/codebundles/mongodb-atlas-operations-health/verify-atlas-backup-config.sh new file mode 100755 index 00000000..eb085fad --- /dev/null +++ b/codebundles/mongodb-atlas-operations-health/verify-atlas-backup-config.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Checks backupEnabled signals on dedicated clusters and gracefully skips tiers where +# cloud backup schedule APIs return 404. Writes JSON issues to atlas_backup_issues.json + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./atlas-helpers.sh +source "${SCRIPT_DIR}/atlas-helpers.sh" + +: "${ATLAS_PROJECT_ID:?Must set ATLAS_PROJECT_ID}" +OUTPUT_FILE="${OUTPUT_FILE:-atlas_backup_issues.json}" + +issues_json='[]' +notes=() + +if ! atlas_resolve_credentials; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot Authenticate to MongoDB Atlas API for Project \`${ATLAS_PROJECT_ID}\`" \ + --arg details "Missing or unparsable Atlas API credentials." \ + --arg severity "4" \ + --arg next_steps "Configure atlas_api_key_credentials or ATLAS_PUBLIC_API_KEY / ATLAS_PRIVATE_API_KEY." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + echo "$issues_json" >"$OUTPUT_FILE" + exit 0 +fi + +atlas_get "groups/${ATLAS_PROJECT_ID}/clusters?itemsPerPage=500&includeCount=true" +if [[ "$ATLAS_LAST_HTTP_CODE" != "200" ]]; then + err="$(echo "$ATLAS_LAST_BODY" | jq -r '.detail // .reason // empty' 2>/dev/null || echo "HTTP $ATLAS_LAST_HTTP_CODE")" + issues_json=$(echo "$issues_json" | jq \ + --arg title "Atlas Clusters API Error for Project \`${ATLAS_PROJECT_ID}\`" \ + --arg details "GET clusters failed: ${err}" \ + --arg severity "4" \ + --arg next_steps "Verify project ID and API role; Flex/serverless layouts may need alternate list endpoints." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + echo "$issues_json" >"$OUTPUT_FILE" + exit 0 +fi + +clusters_body="$ATLAS_LAST_BODY" + +while IFS= read -r row; do + [[ -z "$row" ]] && continue + name="$(echo "$row" | jq -r '.name')" + if ! cluster_matches_filter "$name"; then + continue + fi + ctype="$(echo "$row" | jq -r '.clusterType // ""')" + backup_on="$(echo "$row" | jq -r '(.backupEnabled // false) or (.providerBackupEnabled // false)')" + srv="$(echo "$row" | jq -r '.connectionStrings.standardSrv // empty')" + + # Try schedule endpoint for extra signal; 404 is expected on unsupported tiers. + atlas_get "groups/${ATLAS_PROJECT_ID}/clusters/$(printf '%s' "$name" | jq -sRr @uri)/backup/schedule" + if [[ "$ATLAS_LAST_HTTP_CODE" == "200" ]]; then + notes+=("cluster=${name}: backup schedule API available") + elif [[ "$ATLAS_LAST_HTTP_CODE" == "404" ]]; then + notes+=("cluster=${name}: cloud backup schedule API not available for this tier; using cluster.backup fields only") + else + notes+=("cluster=${name}: backup schedule GET returned HTTP ${ATLAS_LAST_HTTP_CODE} (non-fatal)") + fi + + # Ignore types that typically do not expose dedicated backup toggles via this view + if [[ "$ctype" == "REPLICA_SET" || "$ctype" == "SHARDED" || "$ctype" == "GEOSHARDED" ]]; then + if [[ "$backup_on" != "true" ]]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cloud Backup Disabled for Atlas Cluster \`${name}\`" \ + --arg details "clusterType=${ctype}; backupEnabled/providerBackupEnabled=false; standardSrv=${srv:-n/a}" \ + --arg severity "4" \ + --arg next_steps "Enable cloud backup / point-in-time recovery for production clusters in Atlas UI or API (https://www.mongodb.com/docs/atlas/backup/)." \ + '. += [{ "title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps }]') + fi + else + notes+=("cluster=${name}: clusterType=${ctype} — backup check skipped (non dedicated layout)") + fi +done < <(echo "$clusters_body" | jq -c '.results // [] | .[]') + +printf '%s\n' "${notes[@]:-}" + +echo "$issues_json" | jq . >"$OUTPUT_FILE" +echo "Wrote $OUTPUT_FILE" +exit 0