From f7034ad9125e6cc0b08fe2c38e253baecbf6ecbc Mon Sep 17 00:00:00 2001 From: "rw-codebundle-agent[bot]" Date: Wed, 22 Apr 2026 23:17:08 +0000 Subject: [PATCH] Add azure-cosmosdb-config-health CodeBundle Implements design spec for read-only Cosmos DB configuration checks: Resource Health, API/consistency, backup, network firewall, private endpoints, diagnostic settings, and activity log surfacing. Includes SLI with aggregated 0-1 score, generation rules for azure_cosmosdb_database_account, and RunWhen templates. Related: runwhen-contrib/rw-cli-codecollection issue #104. Made-with: Cursor --- .../azure-cosmosdb-config-health.yaml | 22 ++ .../azure-cosmosdb-config-health-sli.yaml | 51 +++ .../azure-cosmosdb-config-health-slx.yaml | 35 +++ .../azure-cosmosdb-config-health-taskset.yaml | 41 +++ .../.test/Taskfile.yaml | 12 + .../.test/validate-bundle-structure.sh | 24 ++ .../azure-cosmosdb-config-health/README.md | 65 ++++ .../cosmosdb-activity-changes.sh | 57 ++++ .../cosmosdb-api-consistency-config.sh | 65 ++++ .../cosmosdb-backup-policy.sh | 62 ++++ .../cosmosdb-diagnostic-settings.sh | 47 +++ .../cosmosdb-network-firewall.sh | 60 ++++ .../cosmosdb-private-endpoints.sh | 62 ++++ .../cosmosdb-resource-health.sh | 91 ++++++ .../cosmosdb-sli-dimensions.sh | 146 +++++++++ .../cosmosdb_common.sh | 30 ++ .../runbook.robot | 292 ++++++++++++++++++ .../azure-cosmosdb-config-health/sli.robot | 104 +++++++ 18 files changed, 1266 insertions(+) create mode 100644 codebundles/azure-cosmosdb-config-health/.runwhen/generation-rules/azure-cosmosdb-config-health.yaml create mode 100644 codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-sli.yaml create mode 100644 codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-slx.yaml create mode 100644 codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-taskset.yaml create mode 100644 codebundles/azure-cosmosdb-config-health/.test/Taskfile.yaml create mode 100755 codebundles/azure-cosmosdb-config-health/.test/validate-bundle-structure.sh create mode 100644 codebundles/azure-cosmosdb-config-health/README.md create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb-activity-changes.sh create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb-api-consistency-config.sh create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb-backup-policy.sh create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb-diagnostic-settings.sh create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb-network-firewall.sh create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb-private-endpoints.sh create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb-resource-health.sh create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb-sli-dimensions.sh create mode 100755 codebundles/azure-cosmosdb-config-health/cosmosdb_common.sh create mode 100644 codebundles/azure-cosmosdb-config-health/runbook.robot create mode 100644 codebundles/azure-cosmosdb-config-health/sli.robot diff --git a/codebundles/azure-cosmosdb-config-health/.runwhen/generation-rules/azure-cosmosdb-config-health.yaml b/codebundles/azure-cosmosdb-config-health/.runwhen/generation-rules/azure-cosmosdb-config-health.yaml new file mode 100644 index 00000000..d00e77b5 --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/.runwhen/generation-rules/azure-cosmosdb-config-health.yaml @@ -0,0 +1,22 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + platform: azure + generationRules: + - resourceTypes: + - azure_cosmosdb_database_account + matchRules: + - type: pattern + pattern: ".+" + properties: [name] + mode: substring + slxs: + - baseName: az-cosmosdb-config-health + qualifiers: ["resource_group", "subscription_id"] + baseTemplateName: azure-cosmosdb-config-health + levelOfDetail: basic + outputItems: + - type: slx + - type: sli + - type: runbook + templateName: azure-cosmosdb-config-health-taskset.yaml diff --git a/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-sli.yaml b/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-sli.yaml new file mode 100644 index 00000000..c2272f3f --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-sli.yaml @@ -0,0 +1,51 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelIndicator +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + displayUnitsLong: Health Score + displayUnitsShort: score + locations: + - {{default_location}} + description: >- + 0-1 configuration health score for Azure Cosmos DB account {{ match_resource.resource.name }} + in {{ resource_group.name }}. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/azure-cosmosdb-config-health/sli.robot + intervalStrategy: intermezzo + intervalSeconds: 300 + configProvided: + - name: AZ_SUBSCRIPTION + value: "{{ subscription_id }}" + - name: AZURE_RESOURCE_GROUP + value: "{{ resource_group.name }}" + - name: COSMOSDB_ACCOUNT_NAME + value: "{{ match_resource.resource.name }}" + - name: ACTIVITY_LOG_LOOKBACK_HOURS + value: "168" + secretsProvided: + {% if wb_version %} + {% include "azure-auth.yaml" ignore missing %} + {% else %} + - name: azure_credentials + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} + + alertConfig: + tasks: + persona: eager-edgar + sessionTTL: 10m diff --git a/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-slx.yaml b/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-slx.yaml new file mode 100644 index 00000000..dca735d6 --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-slx.yaml @@ -0,0 +1,35 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{ slx_name }} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/azure/databases/10137-icon-service-Azure-Cosmos-DB.svg + alias: >- + {{ match_resource.resource_group.name }} Cosmos DB {{ match_resource.resource.name }} Configuration Health + asMeasuredBy: Composite configuration score (Resource Health, API, backup, network, private link, diagnostics, activity). + configProvided: + - name: SLX_PLACEHOLDER + value: SLX_PLACEHOLDER + owners: + - {{ workspace.owner_email }} + statement: >- + Monitor Azure Cosmos DB account configuration for availability signals, security-related settings, + backup posture, network exposure, private endpoints, diagnostic exports, and recent administrative changes + in resource group {{ match_resource.resource_group.name }}. + additionalContext: + {% include "azure-hierarchy.yaml" ignore missing %} + qualified_name: "{{ match_resource.qualified_name }}" + tags: + {% include "azure-tags.yaml" ignore missing %} + - name: cloud + value: azure + - name: service + value: cosmosdb + - name: scope + value: resource-group + - name: access + value: read-only diff --git a/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-taskset.yaml b/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-taskset.yaml new file mode 100644 index 00000000..1614e934 --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/.runwhen/templates/azure-cosmosdb-config-health-taskset.yaml @@ -0,0 +1,41 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + description: >- + Validates Azure Cosmos DB configuration health for account {{ match_resource.resource.name }} + in resource group {{ resource_group.name }} (subscription {{ subscription_name }}). + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/azure-cosmosdb-config-health/runbook.robot + configProvided: + - name: AZ_SUBSCRIPTION + value: "{{ subscription_id }}" + - name: AZURE_RESOURCE_GROUP + value: "{{ resource_group.name }}" + - name: COSMOSDB_ACCOUNT_NAME + value: "{{ match_resource.resource.name }}" + - name: ACTIVITY_LOG_LOOKBACK_HOURS + value: "168" + secretsProvided: + {% if wb_version %} + {% include "azure-auth.yaml" ignore missing %} + {% else %} + - name: azure_credentials + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} diff --git a/codebundles/azure-cosmosdb-config-health/.test/Taskfile.yaml b/codebundles/azure-cosmosdb-config-health/.test/Taskfile.yaml new file mode 100644 index 00000000..dc7ac702 --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/.test/Taskfile.yaml @@ -0,0 +1,12 @@ +version: "3" + +tasks: + default: + desc: "Validate CodeBundle structure" + cmds: + - ./validate-bundle-structure.sh + + clean: + desc: "Remove local validator temp files" + cmds: + - rm -f .validator-tmp.* diff --git a/codebundles/azure-cosmosdb-config-health/.test/validate-bundle-structure.sh b/codebundles/azure-cosmosdb-config-health/.test/validate-bundle-structure.sh new file mode 100755 index 00000000..fb2e679b --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/.test/validate-bundle-structure.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT" +fail=0 +need() { + if [[ ! -e "$1" ]]; then + echo "missing: $1" >&2 + fail=1 + fi +} +need runbook.robot +need sli.robot +need README.md +need .runwhen/generation-rules/azure-cosmosdb-config-health.yaml +need .runwhen/templates/azure-cosmosdb-config-health-slx.yaml +need .runwhen/templates/azure-cosmosdb-config-health-taskset.yaml +need .runwhen/templates/azure-cosmosdb-config-health-sli.yaml +for s in cosmosdb-resource-health.sh cosmosdb-api-consistency-config.sh cosmosdb-backup-policy.sh \ + cosmosdb-network-firewall.sh cosmosdb-private-endpoints.sh cosmosdb-diagnostic-settings.sh \ + cosmosdb-activity-changes.sh cosmosdb-sli-dimensions.sh cosmosdb_common.sh; do + need "$s" +done +exit "$fail" diff --git a/codebundles/azure-cosmosdb-config-health/README.md b/codebundles/azure-cosmosdb-config-health/README.md new file mode 100644 index 00000000..19b519da --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/README.md @@ -0,0 +1,65 @@ +# Azure Cosmos DB Configuration Health + +This CodeBundle validates Azure Cosmos DB account settings that affect availability, security, recoverability, and operability. It uses read-only Azure Resource Manager, Resource Health, monitoring, and activity log APIs—aligned with **Reader**-style access plus diagnostic read permissions where required. + +## Overview + +- **Resource Health**: Detects Azure platform or account-level availability states that are not `Available`. +- **API and consistency**: Flags `Eventual` default consistency, inconsistent multi-region write configuration, and accounts that still allow key-based metadata writes. +- **Backup**: Ensures a supported backup mode (periodic with reasonable retention or continuous). +- **Public network and firewall**: Surfaces wide-open public access patterns and `0.0.0.0` firewall rules. +- **Private endpoints**: When public access is disabled, verifies private endpoints exist and are `Approved`. +- **Diagnostic settings**: Confirms at least one diagnostic setting exports telemetry. +- **Activity log**: Summarizes recent administrative events for the account to support change correlation. + +Pair with utilization-focused bundles (for example RU and latency metrics) for a full operational picture. + +## Configuration + +### Required variables + +- `AZ_SUBSCRIPTION`: Azure subscription ID (UUID) for the Cosmos DB account. +- `AZURE_RESOURCE_GROUP`: Resource group that contains the account (or accounts when scanning `All`). + +### Optional variables + +- `COSMOSDB_ACCOUNT_NAME`: Cosmos DB account name. Set to `All` (default) to evaluate every account in the resource group. +- `ACTIVITY_LOG_LOOKBACK_HOURS`: Hours of activity log history to scan for administrative events (default: `168`). + +### Secrets + +- `azure_credentials`: Service principal or workspace secret in the format expected by your Azure CLI login flow (commonly `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`, and subscription context). Match the pattern used by other Azure CLI CodeBundles in the workspace. + +## Tasks overview + +### Check Cosmos DB Resource Health + +Queries `Microsoft.ResourceHealth/availabilityStatuses/current` for each scoped account and raises issues when the reported title is not `Available`. + +### Check Cosmos DB API and Consistency Configuration + +Reads `az cosmosdb show` output to evaluate default consistency, multi-region write flags versus region count, and metadata write protection. + +### Check Cosmos DB Backup and Point-in-Time Settings + +Validates periodic backup retention (minimum eight hours) or continuous backup mode. + +### Check Cosmos DB Public Network Access and Firewall Rules + +Detects public network exposure without compensating IP or private-link controls and flags `0.0.0.0` rules. + +### Check Cosmos DB Private Endpoint Configuration + +When public access is disabled, requires private endpoints in an `Approved` state. + +### Check Cosmos DB Diagnostic Settings + +Lists Azure Monitor diagnostic settings for the account resource ID and flags a missing configuration. + +### Check Cosmos DB Activity Log for Recent Configuration Changes + +Lists recent administrative activity log events scoped to the account for change awareness. + +## Service Level Indicator + +`sli.robot` emits a 0–1 score averaged across seven configuration dimensions using `cosmosdb-sli-dimensions.sh`. Sub-metrics are published per dimension for dashboard drill-down. diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb-activity-changes.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb-activity-changes.sh new file mode 100755 index 00000000..5e1a9a80 --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb-activity-changes.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# Recent Administrative activity log events for the account (config mutations). +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=cosmosdb_common.sh +source "${SCRIPT_DIR}/cosmosdb_common.sh" + +: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}" + +OUTPUT_FILE="cosmosdb_activity_issues.json" +echo '[]' > "$OUTPUT_FILE" + +LOOKBACK_HOURS="${ACTIVITY_LOG_LOOKBACK_HOURS:-168}" +# az monitor activity-log --offset expects formats like 168h +OFFSET="${LOOKBACK_HOURS}h" + +subscription="$(cosmosdb_resolve_subscription)" +[[ -z "$subscription" ]] && exit 0 +az account set --subscription "$subscription" 2>/dev/null || exit 0 + +COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}" +mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER") +[[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]] && { echo '[]' > "$OUTPUT_FILE"; cat "$OUTPUT_FILE"; exit 0; } + +for acct in "${accounts[@]}"; do + [[ -z "$acct" ]] && continue + rid="$(cosmosdb_account_resource_id "$subscription" "$AZURE_RESOURCE_GROUP" "$acct")" + if ! log_json=$(az monitor activity-log list --resource-id "$rid" --offset "$OFFSET" --max-events 100 --subscription "$subscription" -o json 2>/dev/null); then + jq --arg t "Cannot read activity log for \`${acct}\`" \ + --arg d "az monitor activity-log list failed." \ + --argjson s 3 \ + --arg n "Verify Reader access on the subscription and resource." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + continue + fi + + admin_events=$(echo "$log_json" | jq '[.[] | select( + .category.value == "Administrative" or .category.localizedValue == "Administrative" or .category == "Administrative" + )]') + count=$(echo "$admin_events" | jq 'length') + if [[ "${count:-0}" -eq 0 ]]; then + continue + fi + + sample=$(echo "$admin_events" | jq -c '[.[:15][] | {time: .eventTimestamp, op: .operationName.localizedValue, status: .status.localizedValue}]') + jq --arg t "Cosmos DB \`${acct}\` has ${count} administrative activity log events in last ${LOOKBACK_HOURS}h" \ + --argjson d "$sample" \ + --argjson s 3 \ + --arg n "Review who changed throughput, failover, networking, or backup; correlate with incidents or change windows." \ + '. += [{title: $t, details: ($d | tostring), severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" +done + +cat "$OUTPUT_FILE" diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb-api-consistency-config.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb-api-consistency-config.sh new file mode 100755 index 00000000..fb19be1d --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb-api-consistency-config.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# API / consistency / capability flags vs common production baselines. +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=cosmosdb_common.sh +source "${SCRIPT_DIR}/cosmosdb_common.sh" + +: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}" + +OUTPUT_FILE="cosmosdb_api_consistency_issues.json" +echo '[]' > "$OUTPUT_FILE" + +subscription="$(cosmosdb_resolve_subscription)" +[[ -z "$subscription" ]] && exit 0 +az account set --subscription "$subscription" 2>/dev/null || exit 0 + +COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}" +mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER") +[[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]] && { echo '[]' > "$OUTPUT_FILE"; cat "$OUTPUT_FILE"; exit 0; } + +for acct in "${accounts[@]}"; do + [[ -z "$acct" ]] && continue + if ! detail=$(az cosmosdb show -g "$AZURE_RESOURCE_GROUP" -n "$acct" --subscription "$subscription" -o json 2>/dev/null); then + jq --arg t "Cannot read Cosmos DB account \`${acct}\`" \ + --arg d "az cosmosdb show failed." \ + --argjson s 3 \ + --arg n "Verify RBAC Reader on the account." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + continue + fi + + defcons=$(echo "$detail" | jq -r '.consistencyPolicy.defaultConsistencyLevel // ""') + if [[ "$defcons" == "Eventual" ]]; then + jq --arg t "Cosmos DB \`${acct}\` uses Eventual default consistency" \ + --arg d "defaultConsistencyLevel=${defcons}. Many production workloads expect Session or stronger guarantees." \ + --argjson s 2 \ + --arg n "Re-evaluate consistency tier for application correctness; consider Session or Bounded Staleness if reads require freshness." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi + + mwl=$(echo "$detail" | jq -r '.enableMultipleWriteLocations // false') + loc_count=$(echo "$detail" | jq '[.locations[]?] | length') + if [[ "$mwl" == "true" ]] && [[ "${loc_count:-0}" -lt 2 ]]; then + jq --arg t "Cosmos DB \`${acct}\` has multi-region writes enabled with a single region" \ + --arg d "enableMultipleWriteLocations=true but locations count=${loc_count}." \ + --argjson s 2 \ + --arg n "Add a second write region or disable multi-region writes if not required." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi + + dk=$(echo "$detail" | jq -r '.disableKeyBasedMetadataWriteAccess // false') + if [[ "$dk" != "true" ]]; then + jq --arg t "Cosmos DB \`${acct}\` allows key-based metadata writes" \ + --arg d "disableKeyBasedMetadataWriteAccess is not true; account metadata can be changed with keys." \ + --argjson s 2 \ + --arg n "Consider setting disableKeyBasedMetadataWriteAccess and using Azure RBAC for control-plane changes." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi +done + +cat "$OUTPUT_FILE" diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb-backup-policy.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb-backup-policy.sh new file mode 100755 index 00000000..bb3de7e7 --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb-backup-policy.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# Backup policy: periodic retention vs continuous backup presence. +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=cosmosdb_common.sh +source "${SCRIPT_DIR}/cosmosdb_common.sh" + +: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}" + +OUTPUT_FILE="cosmosdb_backup_issues.json" +echo '[]' > "$OUTPUT_FILE" + +subscription="$(cosmosdb_resolve_subscription)" +[[ -z "$subscription" ]] && exit 0 +az account set --subscription "$subscription" 2>/dev/null || exit 0 + +COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}" +mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER") +[[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]] && { echo '[]' > "$OUTPUT_FILE"; cat "$OUTPUT_FILE"; exit 0; } + +for acct in "${accounts[@]}"; do + [[ -z "$acct" ]] && continue + if ! detail=$(az cosmosdb show -g "$AZURE_RESOURCE_GROUP" -n "$acct" --subscription "$subscription" -o json 2>/dev/null); then + jq --arg t "Cannot read backup policy for \`${acct}\`" \ + --arg d "az cosmosdb show failed." \ + --argjson s 3 \ + --arg n "Verify Reader access." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + continue + fi + + btype=$(echo "$detail" | jq -r '.backupPolicy.type // "unknown"') + + if [[ "$btype" == "Continuous" ]]; then + # Continuous backup — good for PITR; optional check for migration window + continue + fi + + if [[ "$btype" == "Periodic" ]]; then + ret_h=$(echo "$detail" | jq -r '.backupPolicy.periodicModeProperties.backupRetentionIntervalInHours // 0 | tonumber') + if [[ "${ret_h:-0}" -lt 8 ]]; then + jq --arg t "Cosmos DB \`${acct}\` has short periodic backup retention" \ + --arg d "backupRetentionIntervalInHours=${ret_h} (below common 8h minimum for operational recovery)." \ + --argjson s 2 \ + --arg n "Increase backup retention or migrate to continuous backup for point-in-time restore." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi + continue + fi + + jq --arg t "Cosmos DB \`${acct}\` backup policy is unclear or missing" \ + --arg d "backupPolicy.type=${btype}" \ + --argjson s 3 \ + --arg n "Confirm backup is enabled in the Azure portal and API version supports backupPolicy." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" +done + +cat "$OUTPUT_FILE" diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb-diagnostic-settings.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb-diagnostic-settings.sh new file mode 100755 index 00000000..5497d626 --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb-diagnostic-settings.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# Diagnostic settings (metrics / logs) destinations. +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=cosmosdb_common.sh +source "${SCRIPT_DIR}/cosmosdb_common.sh" + +: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}" + +OUTPUT_FILE="cosmosdb_diagnostic_issues.json" +echo '[]' > "$OUTPUT_FILE" + +subscription="$(cosmosdb_resolve_subscription)" +[[ -z "$subscription" ]] && exit 0 +az account set --subscription "$subscription" 2>/dev/null || exit 0 + +COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}" +mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER") +[[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]] && { echo '[]' > "$OUTPUT_FILE"; cat "$OUTPUT_FILE"; exit 0; } + +for acct in "${accounts[@]}"; do + [[ -z "$acct" ]] && continue + rid="$(cosmosdb_account_resource_id "$subscription" "$AZURE_RESOURCE_GROUP" "$acct")" + if ! settings=$(az monitor diagnostic-settings list --resource "$rid" --subscription "$subscription" -o json 2>/dev/null); then + jq --arg t "Cannot list diagnostic settings for \`${acct}\`" \ + --arg d "az monitor diagnostic-settings list failed for ${rid}" \ + --argjson s 3 \ + --arg n "Verify Monitoring Reader or equivalent on the subscription/resource." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + continue + fi + + n=$(echo "$settings" | jq 'if type == "array" then length else [.value[]?] | length end') + if [[ "${n:-0}" -eq 0 ]]; then + jq --arg t "Cosmos DB \`${acct}\` has no diagnostic settings" \ + --arg d "No diagnostic settings send metrics or logs to Log Analytics, storage, or Event Hub." \ + --argjson s 3 \ + --arg n "Configure diagnostic settings to stream control-plane metrics/logs for audit and troubleshooting." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi +done + +cat "$OUTPUT_FILE" diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb-network-firewall.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb-network-firewall.sh new file mode 100755 index 00000000..7c75244d --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb-network-firewall.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# Public network access and IP firewall rules. +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=cosmosdb_common.sh +source "${SCRIPT_DIR}/cosmosdb_common.sh" + +: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}" + +OUTPUT_FILE="cosmosdb_network_issues.json" +echo '[]' > "$OUTPUT_FILE" + +subscription="$(cosmosdb_resolve_subscription)" +[[ -z "$subscription" ]] && exit 0 +az account set --subscription "$subscription" 2>/dev/null || exit 0 + +COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}" +mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER") +[[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]] && { echo '[]' > "$OUTPUT_FILE"; cat "$OUTPUT_FILE"; exit 0; } + +for acct in "${accounts[@]}"; do + [[ -z "$acct" ]] && continue + if ! detail=$(az cosmosdb show -g "$AZURE_RESOURCE_GROUP" -n "$acct" --subscription "$subscription" -o json 2>/dev/null); then + jq --arg t "Cannot read network settings for \`${acct}\`" \ + --arg d "az cosmosdb show failed." \ + --argjson s 3 \ + --arg n "Verify Reader access." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + continue + fi + + pub=$(echo "$detail" | jq -r '.publicNetworkAccess // "Enabled"') + ip_count=$(echo "$detail" | jq '[.ipRules[]?] | length') + open_ip=$(echo "$detail" | jq '[.ipRules[]? | select(.ipAddressOrRange == "0.0.0.0")] | length') + + if [[ "${open_ip:-0}" -gt 0 ]]; then + jq --arg t "Cosmos DB \`${acct}\` allows 0.0.0.0 in IP firewall" \ + --arg d "ipRules include 0.0.0.0 which is overly permissive." \ + --argjson s 3 \ + --arg n "Remove open internet rules; restrict to known egress IPs or private endpoints." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi + + if [[ "$pub" == "Enabled" ]] && [[ "${ip_count:-0}" -eq 0 ]]; then + pe_count=$(echo "$detail" | jq '[.privateEndpointConnections[]?] | length') + if [[ "${pe_count:-0}" -eq 0 ]]; then + jq --arg t "Cosmos DB \`${acct}\` is reachable from public network without IP rules" \ + --arg d "publicNetworkAccess=Enabled and no ipRules; no private endpoints." \ + --argjson s 2 \ + --arg n "Disable public access and use private endpoints, or restrict with IP firewall / VNet integration." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi + fi +done + +cat "$OUTPUT_FILE" diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb-private-endpoints.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb-private-endpoints.sh new file mode 100755 index 00000000..8a009a5c --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb-private-endpoints.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# Private Link: when public access is off, require approved private endpoints. +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=cosmosdb_common.sh +source "${SCRIPT_DIR}/cosmosdb_common.sh" + +: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}" + +OUTPUT_FILE="cosmosdb_private_endpoint_issues.json" +echo '[]' > "$OUTPUT_FILE" + +subscription="$(cosmosdb_resolve_subscription)" +[[ -z "$subscription" ]] && exit 0 +az account set --subscription "$subscription" 2>/dev/null || exit 0 + +COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}" +mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER") +[[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]] && { echo '[]' > "$OUTPUT_FILE"; cat "$OUTPUT_FILE"; exit 0; } + +for acct in "${accounts[@]}"; do + [[ -z "$acct" ]] && continue + if ! detail=$(az cosmosdb show -g "$AZURE_RESOURCE_GROUP" -n "$acct" --subscription "$subscription" -o json 2>/dev/null); then + jq --arg t "Cannot read private endpoint configuration for \`${acct}\`" \ + --arg d "az cosmosdb show failed." \ + --argjson s 3 \ + --arg n "Verify Reader access." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + continue + fi + + pub=$(echo "$detail" | jq -r '.publicNetworkAccess // "Enabled"') + if [[ "$pub" == "Enabled" ]]; then + continue + fi + + pe_n=$(echo "$detail" | jq '[.privateEndpointConnections[]?] | length') + if [[ "${pe_n:-0}" -eq 0 ]]; then + jq --arg t "Cosmos DB \`${acct}\` blocks public access but has no private endpoints" \ + --arg d "publicNetworkAccess=${pub}; privateEndpointConnections empty." \ + --argjson s 3 \ + --arg n "Create and approve a private endpoint for this account or re-enable controlled public access." \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + continue + fi + + bad_pe=$(echo "$detail" | jq '[.privateEndpointConnections[]? | select((.privateLinkServiceConnectionState.status // "") != "Approved")] | length') + if [[ "${bad_pe:-0}" -gt 0 ]]; then + pend=$(echo "$detail" | jq -c '[.privateEndpointConnections[]? | select((.privateLinkServiceConnectionState.status // "") != "Approved")]') + jq --arg t "Cosmos DB \`${acct}\` has private endpoint connections not in Approved state" \ + --argjson d "$pend" \ + --argjson s 2 \ + --arg n "Approve or remove stale private endpoint connections in the Azure portal." \ + '. += [{title: $t, details: ($d | tostring), severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi +done + +cat "$OUTPUT_FILE" diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb-resource-health.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb-resource-health.sh new file mode 100755 index 00000000..7c99e3da --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb-resource-health.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# Resource Health (Microsoft.ResourceHealth) for Cosmos DB accounts. +# Writes JSON array of issues to cosmosdb_resource_health_issues.json +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=cosmosdb_common.sh +source "${SCRIPT_DIR}/cosmosdb_common.sh" + +: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}" + +OUTPUT_FILE="cosmosdb_resource_health_issues.json" +echo '[]' > "$OUTPUT_FILE" + +subscription="$(cosmosdb_resolve_subscription)" +if [[ -z "$subscription" ]]; then + jq -n --arg t "Cannot resolve Azure subscription" \ + --arg d "Set AZ_SUBSCRIPTION or log in with Azure CLI." \ + --arg n "Verify AZ_SUBSCRIPTION and azure_credentials." \ + '[{title: $t, details: $d, severity: 4, next_steps: $n}]' > "$OUTPUT_FILE" + cat "$OUTPUT_FILE" + exit 0 +fi + +az account set --subscription "$subscription" 2>/dev/null || { + jq -n --arg t "Failed to set Azure subscription" \ + --arg d "az account set failed for subscription ${subscription}" \ + --arg n "Verify credentials and subscription access." \ + '[{title: $t, details: $d, severity: 4, next_steps: $n}]' > "$OUTPUT_FILE" + cat "$OUTPUT_FILE" + exit 0 +} + +reg_state=$(az provider show --namespace Microsoft.ResourceHealth --query "registrationState" -o tsv 2>/dev/null || echo "NotRegistered") +if [[ "$reg_state" != "Registered" ]]; then + az provider register --namespace Microsoft.ResourceHealth 2>/dev/null || true + for _ in {1..12}; do + reg_state=$(az provider show --namespace Microsoft.ResourceHealth --query "registrationState" -o tsv 2>/dev/null || echo "") + [[ "$reg_state" == "Registered" ]] && break + sleep 10 + done +fi + +if [[ "$reg_state" != "Registered" ]]; then + jq -n --arg t "Microsoft.ResourceHealth provider not registered" \ + --arg d "Registration state: ${reg_state}" \ + --arg n "Register Microsoft.ResourceHealth for the subscription or retry later." \ + '[{title: $t, details: $d, severity: 3, next_steps: $n}]' > "$OUTPUT_FILE" + cat "$OUTPUT_FILE" + exit 0 +fi + +COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}" +mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER") + +if [[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]]; then + jq -n --arg t "No Cosmos DB accounts found in resource group" \ + --arg d "Resource group: ${AZURE_RESOURCE_GROUP}; filter: ${COSMOS_FILTER}" \ + --arg n "Confirm account names, resource group, and subscription." \ + '[{title: $t, details: $d, severity: 3, next_steps: $n}]' > "$OUTPUT_FILE" + cat "$OUTPUT_FILE" + exit 0 +fi + +for acct in "${accounts[@]}"; do + [[ -z "$acct" ]] && continue + url="https://management.azure.com/subscriptions/${subscription}/resourceGroups/${AZURE_RESOURCE_GROUP}/providers/Microsoft.DocumentDB/databaseAccounts/${acct}/providers/Microsoft.ResourceHealth/availabilityStatuses/current?api-version=2023-07-01-preview" + if ! health=$(az rest --method get --url "$url" -o json 2>/dev/null); then + jq --arg t "Resource Health query failed for \`${acct}\`" \ + --arg d "Could not retrieve availability status from Resource Health API." \ + --arg n "Verify Reader access and that the account exists." \ + --argjson s 3 \ + '. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + continue + fi + title=$(echo "$health" | jq -r '.properties.title // "Unknown"') + if [[ "$title" != "Available" ]]; then + details=$(echo "$health" | jq -c '{title: .properties.title, reason: .properties.reasonType, summary: .properties.summary, occurred: .properties.occuredTime}') + jq --arg t "Cosmos DB \`${acct}\` reports Resource Health: ${title}" \ + --argjson d "$details" \ + --argjson s 2 \ + --arg n "Review Azure Service Health and Cosmos DB status; engage Azure support if outage persists." \ + '. += [{title: $t, details: ($d | tostring), severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE" + fi +done + +echo "Wrote ${OUTPUT_FILE}" +cat "$OUTPUT_FILE" diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb-sli-dimensions.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb-sli-dimensions.sh new file mode 100755 index 00000000..5a1d3a9f --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb-sli-dimensions.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x +# ----------------------------------------------------------------------------- +# Lightweight combined dimensions for SLI (single pass per account). +# Prints one JSON object to stdout: dimensions + aggregate (0-1). +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=cosmosdb_common.sh +source "${SCRIPT_DIR}/cosmosdb_common.sh" + +: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}" + +empty_out() { + jq -n '{dimensions:{resource_health:0,api_consistency:0,backup:0,network:0,private_endpoints:0,diagnostics:0,activity:0},aggregate:0}' +} + +subscription="$(cosmosdb_resolve_subscription)" +if [[ -z "$subscription" ]]; then + empty_out + exit 0 +fi + +if ! az account set --subscription "$subscription" 2>/dev/null; then + empty_out + exit 0 +fi + +LOOKBACK_HOURS="${ACTIVITY_LOG_LOOKBACK_HOURS:-168}" +OFFSET="${LOOKBACK_HOURS}h" +COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}" +mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER") + +if [[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]]; then + empty_out + exit 0 +fi + +sum_rh=0 sum_api=0 sum_bu=0 sum_net=0 sum_pe=0 sum_diag=0 sum_act=0 +n=0 + +for acct in "${accounts[@]}"; do + [[ -z "$acct" ]] && continue + n=$((n + 1)) + + url="https://management.azure.com/subscriptions/${subscription}/resourceGroups/${AZURE_RESOURCE_GROUP}/providers/Microsoft.DocumentDB/databaseAccounts/${acct}/providers/Microsoft.ResourceHealth/availabilityStatuses/current?api-version=2023-07-01-preview" + rh=0 + if health=$(az rest --method get --url "$url" -o json 2>/dev/null); then + t=$(echo "$health" | jq -r '.properties.title // ""') + [[ "$t" == "Available" ]] && rh=1 || rh=0 + fi + + if ! detail=$(az cosmosdb show -g "$AZURE_RESOURCE_GROUP" -n "$acct" --subscription "$subscription" -o json 2>/dev/null); then + sum_rh=$((sum_rh + rh)) + continue + fi + + api=1 + defcons=$(echo "$detail" | jq -r '.consistencyPolicy.defaultConsistencyLevel // ""') + [[ "$defcons" == "Eventual" ]] && api=0 + mwl=$(echo "$detail" | jq -r '.enableMultipleWriteLocations // false') + loc_count=$(echo "$detail" | jq '[.locations[]?] | length') + [[ "$mwl" == "true" && "${loc_count:-0}" -lt 2 ]] && api=0 + dk=$(echo "$detail" | jq -r '.disableKeyBasedMetadataWriteAccess // false') + [[ "$dk" != "true" ]] && api=0 + + bu=1 + btype=$(echo "$detail" | jq -r '.backupPolicy.type // "unknown"') + if [[ "$btype" == "Periodic" ]]; then + ret_h=$(echo "$detail" | jq -r '(.backupPolicy.periodicModeProperties.backupRetentionIntervalInHours // 0)') + [[ "${ret_h:-0}" -lt 8 ]] && bu=0 + elif [[ "$btype" != "Continuous" ]]; then + bu=0 + fi + + net=1 + pub=$(echo "$detail" | jq -r '.publicNetworkAccess // "Enabled"') + ip_count=$(echo "$detail" | jq '[.ipRules[]?] | length') + open_ip=$(echo "$detail" | jq '[.ipRules[]? | select(.ipAddressOrRange == "0.0.0.0")] | length') + [[ "${open_ip:-0}" -gt 0 ]] && net=0 + if [[ "$pub" == "Enabled" && "${ip_count:-0}" -eq 0 ]]; then + pe_count=$(echo "$detail" | jq '[.privateEndpointConnections[]?] | length') + [[ "${pe_count:-0}" -eq 0 ]] && net=0 + fi + + pe=1 + if [[ "$pub" != "Enabled" ]]; then + pe_n=$(echo "$detail" | jq '[.privateEndpointConnections[]?] | length') + if [[ "${pe_n:-0}" -eq 0 ]]; then + pe=0 + else + bad_pe=$(echo "$detail" | jq '[.privateEndpointConnections[]? | select((.privateLinkServiceConnectionState.status // "") != "Approved")] | length') + [[ "${bad_pe:-0}" -gt 0 ]] && pe=0 + fi + fi + + diag=0 + rid="$(cosmosdb_account_resource_id "$subscription" "$AZURE_RESOURCE_GROUP" "$acct")" + if settings=$(az monitor diagnostic-settings list --resource "$rid" --subscription "$subscription" -o json 2>/dev/null); then + dn=$(echo "$settings" | jq 'if type == "array" then length else [.value[]?] | length end') + [[ "${dn:-0}" -gt 0 ]] && diag=1 + fi + + act=1 + if log_json=$(az monitor activity-log list --resource-id "$rid" --offset "$OFFSET" --max-events 50 --subscription "$subscription" -o json 2>/dev/null); then + acount=$(echo "$log_json" | jq '[.[] | select( + .category.value == "Administrative" or .category.localizedValue == "Administrative" or .category == "Administrative" + )] | length') + [[ "${acount:-0}" -gt 0 ]] && act=0 + fi + + sum_rh=$((sum_rh + rh)) + sum_api=$((sum_api + api)) + sum_bu=$((sum_bu + bu)) + sum_net=$((sum_net + net)) + sum_pe=$((sum_pe + pe)) + sum_diag=$((sum_diag + diag)) + sum_act=$((sum_act + act)) +done + +if [[ "$n" -eq 0 ]]; then + empty_out + exit 0 +fi + +jq -n \ + --argjson sum_rh "$sum_rh" \ + --argjson sum_api "$sum_api" \ + --argjson sum_bu "$sum_bu" \ + --argjson sum_net "$sum_net" \ + --argjson sum_pe "$sum_pe" \ + --argjson sum_diag "$sum_diag" \ + --argjson sum_act "$sum_act" \ + --argjson n "$n" \ + ' + ($sum_rh/$n) as $rh | + ($sum_api/$n) as $api | + ($sum_bu/$n) as $bu | + ($sum_net/$n) as $net | + ($sum_pe/$n) as $pe | + ($sum_diag/$n) as $diag | + ($sum_act/$n) as $act | + (($rh + $api + $bu + $net + $pe + $diag + $act) / 7) as $agg | + {dimensions:{resource_health:$rh, api_consistency:$api, backup:$bu, network:$net, private_endpoints:$pe, diagnostics:$diag, activity:$act}, aggregate:$agg} + ' diff --git a/codebundles/azure-cosmosdb-config-health/cosmosdb_common.sh b/codebundles/azure-cosmosdb-config-health/cosmosdb_common.sh new file mode 100755 index 00000000..a349f35e --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/cosmosdb_common.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Shared helpers for Azure Cosmos DB configuration health scripts. + +cosmosdb_resolve_subscription() { + local sub="${AZ_SUBSCRIPTION:-}" + if [[ -z "$sub" ]]; then + sub="${AZURE_RESOURCE_SUBSCRIPTION_ID:-}" + fi + if [[ -z "$sub" ]]; then + sub=$(az account show --query id -o tsv 2>/dev/null || true) + fi + printf '%s' "$sub" +} + +cosmosdb_account_names() { + local sub="$1" rg="$2" filter="${3:-All}" + az account set --subscription "$sub" 2>/dev/null || true + local fl + fl=$(echo "$filter" | tr '[:upper:]' '[:lower:]') + if [[ -z "$filter" || "$fl" == "all" ]]; then + az cosmosdb list -g "$rg" --subscription "$sub" --query "[].name" -o tsv 2>/dev/null || true + else + printf '%s\n' "$filter" + fi +} + +cosmosdb_account_resource_id() { + local sub="$1" rg="$2" name="$3" + printf '/subscriptions/%s/resourceGroups/%s/providers/Microsoft.DocumentDB/databaseAccounts/%s' "$sub" "$rg" "$name" +} diff --git a/codebundles/azure-cosmosdb-config-health/runbook.robot b/codebundles/azure-cosmosdb-config-health/runbook.robot new file mode 100644 index 00000000..3c939e9a --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/runbook.robot @@ -0,0 +1,292 @@ +*** Settings *** +Documentation Validates Azure Cosmos DB account configuration for availability, security, recoverability, and operability using read-only Azure Resource Manager and monitoring APIs. +Metadata Author rw-codebundle-agent +Metadata Display Name Azure Cosmos DB Configuration Health +Metadata Supports Azure Cosmos DB Configuration Health +Force Tags Azure Cosmos DB Configuration Health + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + + +*** Tasks *** +Check Cosmos DB Resource Health for Account `${COSMOSDB_ACCOUNT_NAME}` in Resource Group `${AZURE_RESOURCE_GROUP}` + [Documentation] Uses Azure Resource Health to detect platform incidents or account-level availability problems for the scoped Cosmos DB account(s). + [Tags] Azure CosmosDB ResourceHealth access:read-only data:config + ${result}= RW.CLI.Run Bash File + ... bash_file=cosmosdb-resource-health.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./cosmosdb-resource-health.sh + RW.Core.Add Pre To Report ${result.stdout} + ${raw}= RW.CLI.Run Cli + ... cmd=cat cosmosdb_resource_health_issues.json + ... env=${env} + ... timeout_seconds=60 + ... include_in_history=false + TRY + ${issue_list}= Evaluate json.loads(r'''${raw.stdout}''') json + EXCEPT + Log Failed to parse JSON for resource health task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Cosmos DB should report Available in Azure Resource Health for accounts in `${AZURE_RESOURCE_GROUP}` + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + +Check Cosmos DB API and Consistency Configuration for Account `${COSMOSDB_ACCOUNT_NAME}` in Resource Group `${AZURE_RESOURCE_GROUP}` + [Documentation] Reviews default consistency, multi-region writes, and metadata write protection against common production baselines. + [Tags] Azure CosmosDB Consistency access:read-only data:config + ${result}= RW.CLI.Run Bash File + ... bash_file=cosmosdb-api-consistency-config.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... cmd_override=./cosmosdb-api-consistency-config.sh + RW.Core.Add Pre To Report ${result.stdout} + ${raw}= RW.CLI.Run Cli + ... cmd=cat cosmosdb_api_consistency_issues.json + ... env=${env} + ... timeout_seconds=60 + ... include_in_history=false + TRY + ${issue_list}= Evaluate json.loads(r'''${raw.stdout}''') json + EXCEPT + Log Failed to parse JSON for API consistency task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Cosmos DB API settings should align with workload consistency and security baselines + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + +Check Cosmos DB Backup and Point-in-Time Settings for Account `${COSMOSDB_ACCOUNT_NAME}` in Resource Group `${AZURE_RESOURCE_GROUP}` + [Documentation] Verifies periodic or continuous backup configuration and retention suitability for recovery objectives. + [Tags] Azure CosmosDB Backup access:read-only data:config + ${result}= RW.CLI.Run Bash File + ... bash_file=cosmosdb-backup-policy.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... cmd_override=./cosmosdb-backup-policy.sh + RW.Core.Add Pre To Report ${result.stdout} + ${raw}= RW.CLI.Run Cli + ... cmd=cat cosmosdb_backup_issues.json + ... env=${env} + ... timeout_seconds=60 + ... include_in_history=false + TRY + ${issue_list}= Evaluate json.loads(r'''${raw.stdout}''') json + EXCEPT + Log Failed to parse JSON for backup policy task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Backup policy should provide adequate retention and a supported backup mode + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + +Check Cosmos DB Public Network Access and Firewall Rules for Account `${COSMOSDB_ACCOUNT_NAME}` in Resource Group `${AZURE_RESOURCE_GROUP}` + [Documentation] Flags public exposure or overly permissive IP rules that conflict with zero-trust network patterns. + [Tags] Azure CosmosDB Networking access:read-only data:config + ${result}= RW.CLI.Run Bash File + ... bash_file=cosmosdb-network-firewall.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... cmd_override=./cosmosdb-network-firewall.sh + RW.Core.Add Pre To Report ${result.stdout} + ${raw}= RW.CLI.Run Cli + ... cmd=cat cosmosdb_network_issues.json + ... env=${env} + ... timeout_seconds=60 + ... include_in_history=false + TRY + ${issue_list}= Evaluate json.loads(r'''${raw.stdout}''') json + EXCEPT + Log Failed to parse JSON for network task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Network access should be restricted with private endpoints or explicit IP/VNet controls + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + +Check Cosmos DB Private Endpoint Configuration for Account `${COSMOSDB_ACCOUNT_NAME}` in Resource Group `${AZURE_RESOURCE_GROUP}` + [Documentation] Validates private link setup and connection approval when public network access is disabled. + [Tags] Azure CosmosDB PrivateLink access:read-only data:config + ${result}= RW.CLI.Run Bash File + ... bash_file=cosmosdb-private-endpoints.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... cmd_override=./cosmosdb-private-endpoints.sh + RW.Core.Add Pre To Report ${result.stdout} + ${raw}= RW.CLI.Run Cli + ... cmd=cat cosmosdb_private_endpoint_issues.json + ... env=${env} + ... timeout_seconds=60 + ... include_in_history=false + TRY + ${issue_list}= Evaluate json.loads(r'''${raw.stdout}''') json + EXCEPT + Log Failed to parse JSON for private endpoint task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Private endpoints should exist and be Approved when public access is disabled + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + +Check Cosmos DB Diagnostic Settings for Account `${COSMOSDB_ACCOUNT_NAME}` in Resource Group `${AZURE_RESOURCE_GROUP}` + [Documentation] Ensures metrics and logs are exported to Log Analytics, storage, or Event Hubs for troubleshooting and audit. + [Tags] Azure CosmosDB Diagnostics access:read-only data:logs-config + ${result}= RW.CLI.Run Bash File + ... bash_file=cosmosdb-diagnostic-settings.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... cmd_override=./cosmosdb-diagnostic-settings.sh + RW.Core.Add Pre To Report ${result.stdout} + ${raw}= RW.CLI.Run Cli + ... cmd=cat cosmosdb_diagnostic_issues.json + ... env=${env} + ... timeout_seconds=60 + ... include_in_history=false + TRY + ${issue_list}= Evaluate json.loads(r'''${raw.stdout}''') json + EXCEPT + Log Failed to parse JSON for diagnostic settings task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=At least one diagnostic setting should stream Cosmos DB platform telemetry + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + +Check Cosmos DB Activity Log for Recent Configuration Changes to Account `${COSMOSDB_ACCOUNT_NAME}` in Resource Group `${AZURE_RESOURCE_GROUP}` + [Documentation] Surfaces recent administrative mutations that may explain throughput, failover, networking, or backup behavior changes. + [Tags] Azure CosmosDB ActivityLog access:read-only data:logs-config + ${result}= RW.CLI.Run Bash File + ... bash_file=cosmosdb-activity-changes.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... cmd_override=./cosmosdb-activity-changes.sh + RW.Core.Add Pre To Report ${result.stdout} + ${raw}= RW.CLI.Run Cli + ... cmd=cat cosmosdb_activity_issues.json + ... env=${env} + ... timeout_seconds=60 + ... include_in_history=false + TRY + ${issue_list}= Evaluate json.loads(r'''${raw.stdout}''') json + EXCEPT + Log Failed to parse JSON for activity log task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Activity log should be reviewed when investigating unexpected Cosmos DB behavior + ... actual=${issue['details']} + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + + +*** Keywords *** +Suite Initialization + ${azure_credentials}= RW.Core.Import Secret + ... azure_credentials + ... type=string + ... description=JSON or env-style secret with Azure service principal fields used by Azure CLI login patterns in this workspace. + ... pattern=\w* + ${AZ_SUBSCRIPTION}= RW.Core.Import User Variable AZ_SUBSCRIPTION + ... type=string + ... description=Azure subscription ID (UUID) containing the Cosmos DB account. + ... pattern=[a-fA-F0-9-]{36} + ${AZURE_RESOURCE_GROUP}= RW.Core.Import User Variable AZURE_RESOURCE_GROUP + ... type=string + ... description=Resource group containing the Cosmos DB account(s). + ... pattern=\w* + ${COSMOSDB_ACCOUNT_NAME}= RW.Core.Import User Variable COSMOSDB_ACCOUNT_NAME + ... type=string + ... description=Cosmos DB account name, or All to scan every account in the resource group. + ... default=All + ... pattern=\w* + ${ACTIVITY_LOG_LOOKBACK_HOURS}= RW.Core.Import User Variable ACTIVITY_LOG_LOOKBACK_HOURS + ... type=string + ... description=Hours of activity log history to scan for administrative events. + ... default=168 + ... pattern=^\d+$ + Set Suite Variable ${AZ_SUBSCRIPTION} ${AZ_SUBSCRIPTION} + Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} + Set Suite Variable ${COSMOSDB_ACCOUNT_NAME} ${COSMOSDB_ACCOUNT_NAME} + Set Suite Variable ${ACTIVITY_LOG_LOOKBACK_HOURS} ${ACTIVITY_LOG_LOOKBACK_HOURS} + Set Suite Variable + ... ${env} + ... {"AZ_SUBSCRIPTION":"${AZ_SUBSCRIPTION}", "AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "COSMOSDB_ACCOUNT_NAME":"${COSMOSDB_ACCOUNT_NAME}", "ACTIVITY_LOG_LOOKBACK_HOURS":"${ACTIVITY_LOG_LOOKBACK_HOURS}"} + RW.CLI.Run Cli + ... cmd=az account set --subscription ${AZ_SUBSCRIPTION} + ... include_in_history=false diff --git a/codebundles/azure-cosmosdb-config-health/sli.robot b/codebundles/azure-cosmosdb-config-health/sli.robot new file mode 100644 index 00000000..bc1337dd --- /dev/null +++ b/codebundles/azure-cosmosdb-config-health/sli.robot @@ -0,0 +1,104 @@ +*** Settings *** +Documentation Measures Azure Cosmos DB configuration health as a 0-1 score from Resource Health, API settings, backup, networking, private endpoints, diagnostics, and recent activity stability. +Metadata Author rw-codebundle-agent +Metadata Display Name Azure Cosmos DB Configuration Health +Metadata Supports Azure Cosmos DB Configuration Health +Force Tags Azure Cosmos DB Configuration Health + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + + +*** Tasks *** +Score Cosmos DB Configuration Health for Account `${COSMOSDB_ACCOUNT_NAME}` + [Documentation] Computes binary sub-scores per configuration dimension and publishes the aggregate 0-1 health metric for alerting. + [Tags] Azure CosmosDB SLI access:read-only data:config + ${result}= RW.CLI.Run Bash File + ... bash_file=cosmosdb-sli-dimensions.sh + ... env=${env} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./cosmosdb-sli-dimensions.sh + TRY + ${payload}= Evaluate json.loads(r'''${result.stdout}''') json + EXCEPT + Log Failed to parse SLI JSON, emitting zero score. WARN + ${payload}= Create Dictionary dimensions=${EMPTY} aggregate=0 + END + TRY + ${dims}= Get From Dictionary ${payload} dimensions + ${rh}= Get From Dictionary ${dims} resource_health + ${api}= Get From Dictionary ${dims} api_consistency + ${bu}= Get From Dictionary ${dims} backup + ${net}= Get From Dictionary ${dims} network + ${pe}= Get From Dictionary ${dims} private_endpoints + ${diag}= Get From Dictionary ${dims} diagnostics + ${act}= Get From Dictionary ${dims} activity + EXCEPT + Log SLI dimensions missing, defaulting to zeros. WARN + ${rh}= Set Variable 0 + ${api}= Set Variable 0 + ${bu}= Set Variable 0 + ${net}= Set Variable 0 + ${pe}= Set Variable 0 + ${diag}= Set Variable 0 + ${act}= Set Variable 0 + END + RW.Core.Push Metric ${rh} sub_name=resource_health + RW.Core.Push Metric ${api} sub_name=api_consistency + RW.Core.Push Metric ${bu} sub_name=backup + RW.Core.Push Metric ${net} sub_name=network + RW.Core.Push Metric ${pe} sub_name=private_endpoints + RW.Core.Push Metric ${diag} sub_name=diagnostics + RW.Core.Push Metric ${act} sub_name=activity + TRY + ${health_score}= Get From Dictionary ${payload} aggregate + EXCEPT + ${health_score}= Set Variable 0 + END + ${health_score}= Convert To Number ${health_score} 4 + RW.Core.Add to Report Cosmos DB configuration health score: ${health_score} + RW.Core.Push Metric ${health_score} + + +*** Keywords *** +Suite Initialization + ${azure_credentials}= RW.Core.Import Secret + ... azure_credentials + ... type=string + ... description=JSON or env-style secret with Azure service principal fields used by Azure CLI login patterns in this workspace. + ... pattern=\w* + ${AZ_SUBSCRIPTION}= RW.Core.Import User Variable AZ_SUBSCRIPTION + ... type=string + ... description=Azure subscription ID (UUID) containing the Cosmos DB account. + ... pattern=[a-fA-F0-9-]{36} + ${AZURE_RESOURCE_GROUP}= RW.Core.Import User Variable AZURE_RESOURCE_GROUP + ... type=string + ... description=Resource group containing the Cosmos DB account(s). + ... pattern=\w* + ${COSMOSDB_ACCOUNT_NAME}= RW.Core.Import User Variable COSMOSDB_ACCOUNT_NAME + ... type=string + ... description=Cosmos DB account name, or All to include every account in the resource group. + ... default=All + ... pattern=\w* + ${ACTIVITY_LOG_LOOKBACK_HOURS}= RW.Core.Import User Variable ACTIVITY_LOG_LOOKBACK_HOURS + ... type=string + ... description=Hours of activity log history used for the activity stability dimension. + ... default=168 + ... pattern=^\d+$ + Set Suite Variable ${AZ_SUBSCRIPTION} ${AZ_SUBSCRIPTION} + Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} + Set Suite Variable ${COSMOSDB_ACCOUNT_NAME} ${COSMOSDB_ACCOUNT_NAME} + Set Suite Variable ${ACTIVITY_LOG_LOOKBACK_HOURS} ${ACTIVITY_LOG_LOOKBACK_HOURS} + Set Suite Variable + ... ${env} + ... {"AZ_SUBSCRIPTION":"${AZ_SUBSCRIPTION}", "AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "COSMOSDB_ACCOUNT_NAME":"${COSMOSDB_ACCOUNT_NAME}", "ACTIVITY_LOG_LOOKBACK_HOURS":"${ACTIVITY_LOG_LOOKBACK_HOURS}"} + RW.CLI.Run Cli + ... cmd=az account set --subscription ${AZ_SUBSCRIPTION} + ... include_in_history=false