Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: runwhen.com/v1
kind: GenerationRules
spec:
platform: azure
generationRules:
- resourceTypes:
- azure_cosmosdb_database_account
matchRules:
- type: pattern
pattern: ".+"
properties: [name]
mode: substring
slxs:
- baseName: az-cosmosdb-config-health
qualifiers: ["resource_group", "subscription_id"]
baseTemplateName: azure-cosmosdb-config-health
levelOfDetail: basic
outputItems:
- type: slx
- type: sli
- type: runbook
templateName: azure-cosmosdb-config-health-taskset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelIndicator
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
displayUnitsLong: Health Score
displayUnitsShort: score
locations:
- {{default_location}}
description: >-
0-1 configuration health score for Azure Cosmos DB account {{ match_resource.resource.name }}
in {{ resource_group.name }}.
codeBundle:
{% if repo_url %}
repoUrl: {{repo_url}}
{% else %}
repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
{% endif %}
{% if ref %}
ref: {{ref}}
{% else %}
ref: main
{% endif %}
pathToRobot: codebundles/azure-cosmosdb-config-health/sli.robot
intervalStrategy: intermezzo
intervalSeconds: 300
configProvided:
- name: AZ_SUBSCRIPTION
value: "{{ subscription_id }}"
- name: AZURE_RESOURCE_GROUP
value: "{{ resource_group.name }}"
- name: COSMOSDB_ACCOUNT_NAME
value: "{{ match_resource.resource.name }}"
- name: ACTIVITY_LOG_LOOKBACK_HOURS
value: "168"
secretsProvided:
{% if wb_version %}
{% include "azure-auth.yaml" ignore missing %}
{% else %}
- name: azure_credentials
workspaceKey: AUTH DETAILS NOT FOUND
{% endif %}

alertConfig:
tasks:
persona: eager-edgar
sessionTTL: 10m
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelX
metadata:
name: {{ slx_name }}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/azure/databases/10137-icon-service-Azure-Cosmos-DB.svg
alias: >-
{{ match_resource.resource_group.name }} Cosmos DB {{ match_resource.resource.name }} Configuration Health
asMeasuredBy: Composite configuration score (Resource Health, API, backup, network, private link, diagnostics, activity).
configProvided:
- name: SLX_PLACEHOLDER
value: SLX_PLACEHOLDER
owners:
- {{ workspace.owner_email }}
statement: >-
Monitor Azure Cosmos DB account configuration for availability signals, security-related settings,
backup posture, network exposure, private endpoints, diagnostic exports, and recent administrative changes
in resource group {{ match_resource.resource_group.name }}.
additionalContext:
{% include "azure-hierarchy.yaml" ignore missing %}
qualified_name: "{{ match_resource.qualified_name }}"
tags:
{% include "azure-tags.yaml" ignore missing %}
- name: cloud
value: azure
- name: service
value: cosmosdb
- name: scope
value: resource-group
- name: access
value: read-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
apiVersion: runwhen.com/v1
kind: Runbook
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
location: {{default_location}}
description: >-
Validates Azure Cosmos DB configuration health for account {{ match_resource.resource.name }}
in resource group {{ resource_group.name }} (subscription {{ subscription_name }}).
codeBundle:
{% if repo_url %}
repoUrl: {{repo_url}}
{% else %}
repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
{% endif %}
{% if ref %}
ref: {{ref}}
{% else %}
ref: main
{% endif %}
pathToRobot: codebundles/azure-cosmosdb-config-health/runbook.robot
configProvided:
- name: AZ_SUBSCRIPTION
value: "{{ subscription_id }}"
- name: AZURE_RESOURCE_GROUP
value: "{{ resource_group.name }}"
- name: COSMOSDB_ACCOUNT_NAME
value: "{{ match_resource.resource.name }}"
- name: ACTIVITY_LOG_LOOKBACK_HOURS
value: "168"
secretsProvided:
{% if wb_version %}
{% include "azure-auth.yaml" ignore missing %}
{% else %}
- name: azure_credentials
workspaceKey: AUTH DETAILS NOT FOUND
{% endif %}
12 changes: 12 additions & 0 deletions codebundles/azure-cosmosdb-config-health/.test/Taskfile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: "3"

tasks:
default:
desc: "Validate CodeBundle structure"
cmds:
- ./validate-bundle-structure.sh

clean:
desc: "Remove local validator temp files"
cmds:
- rm -f .validator-tmp.*
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
fail=0
need() {
if [[ ! -e "$1" ]]; then
echo "missing: $1" >&2
fail=1
fi
}
need runbook.robot
need sli.robot
need README.md
need .runwhen/generation-rules/azure-cosmosdb-config-health.yaml
need .runwhen/templates/azure-cosmosdb-config-health-slx.yaml
need .runwhen/templates/azure-cosmosdb-config-health-taskset.yaml
need .runwhen/templates/azure-cosmosdb-config-health-sli.yaml
for s in cosmosdb-resource-health.sh cosmosdb-api-consistency-config.sh cosmosdb-backup-policy.sh \
cosmosdb-network-firewall.sh cosmosdb-private-endpoints.sh cosmosdb-diagnostic-settings.sh \
cosmosdb-activity-changes.sh cosmosdb-sli-dimensions.sh cosmosdb_common.sh; do
need "$s"
done
exit "$fail"
65 changes: 65 additions & 0 deletions codebundles/azure-cosmosdb-config-health/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Azure Cosmos DB Configuration Health

This CodeBundle validates Azure Cosmos DB account settings that affect availability, security, recoverability, and operability. It uses read-only Azure Resource Manager, Resource Health, monitoring, and activity log APIs—aligned with **Reader**-style access plus diagnostic read permissions where required.

## Overview

- **Resource Health**: Detects Azure platform or account-level availability states that are not `Available`.
- **API and consistency**: Flags `Eventual` default consistency, inconsistent multi-region write configuration, and accounts that still allow key-based metadata writes.
- **Backup**: Ensures a supported backup mode (periodic with reasonable retention or continuous).
- **Public network and firewall**: Surfaces wide-open public access patterns and `0.0.0.0` firewall rules.
- **Private endpoints**: When public access is disabled, verifies private endpoints exist and are `Approved`.
- **Diagnostic settings**: Confirms at least one diagnostic setting exports telemetry.
- **Activity log**: Summarizes recent administrative events for the account to support change correlation.

Pair with utilization-focused bundles (for example RU and latency metrics) for a full operational picture.

## Configuration

### Required variables

- `AZ_SUBSCRIPTION`: Azure subscription ID (UUID) for the Cosmos DB account.
- `AZURE_RESOURCE_GROUP`: Resource group that contains the account (or accounts when scanning `All`).

### Optional variables

- `COSMOSDB_ACCOUNT_NAME`: Cosmos DB account name. Set to `All` (default) to evaluate every account in the resource group.
- `ACTIVITY_LOG_LOOKBACK_HOURS`: Hours of activity log history to scan for administrative events (default: `168`).

### Secrets

- `azure_credentials`: Service principal or workspace secret in the format expected by your Azure CLI login flow (commonly `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`, and subscription context). Match the pattern used by other Azure CLI CodeBundles in the workspace.

## Tasks overview

### Check Cosmos DB Resource Health

Queries `Microsoft.ResourceHealth/availabilityStatuses/current` for each scoped account and raises issues when the reported title is not `Available`.

### Check Cosmos DB API and Consistency Configuration

Reads `az cosmosdb show` output to evaluate default consistency, multi-region write flags versus region count, and metadata write protection.

### Check Cosmos DB Backup and Point-in-Time Settings

Validates periodic backup retention (minimum eight hours) or continuous backup mode.

### Check Cosmos DB Public Network Access and Firewall Rules

Detects public network exposure without compensating IP or private-link controls and flags `0.0.0.0` rules.

### Check Cosmos DB Private Endpoint Configuration

When public access is disabled, requires private endpoints in an `Approved` state.

### Check Cosmos DB Diagnostic Settings

Lists Azure Monitor diagnostic settings for the account resource ID and flags a missing configuration.

### Check Cosmos DB Activity Log for Recent Configuration Changes

Lists recent administrative activity log events scoped to the account for change awareness.

## Service Level Indicator

`sli.robot` emits a 0–1 score averaged across seven configuration dimensions using `cosmosdb-sli-dimensions.sh`. Sub-metrics are published per dimension for dashboard drill-down.
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env bash
set -euo pipefail
set -x
# -----------------------------------------------------------------------------
# Recent Administrative activity log events for the account (config mutations).
# -----------------------------------------------------------------------------

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=cosmosdb_common.sh
source "${SCRIPT_DIR}/cosmosdb_common.sh"

: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}"

OUTPUT_FILE="cosmosdb_activity_issues.json"
echo '[]' > "$OUTPUT_FILE"

LOOKBACK_HOURS="${ACTIVITY_LOG_LOOKBACK_HOURS:-168}"
# az monitor activity-log --offset expects formats like 168h
OFFSET="${LOOKBACK_HOURS}h"

subscription="$(cosmosdb_resolve_subscription)"
[[ -z "$subscription" ]] && exit 0
az account set --subscription "$subscription" 2>/dev/null || exit 0

COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}"
mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER")
[[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]] && { echo '[]' > "$OUTPUT_FILE"; cat "$OUTPUT_FILE"; exit 0; }

for acct in "${accounts[@]}"; do
[[ -z "$acct" ]] && continue
rid="$(cosmosdb_account_resource_id "$subscription" "$AZURE_RESOURCE_GROUP" "$acct")"
if ! log_json=$(az monitor activity-log list --resource-id "$rid" --offset "$OFFSET" --max-events 100 --subscription "$subscription" -o json 2>/dev/null); then
jq --arg t "Cannot read activity log for \`${acct}\`" \
--arg d "az monitor activity-log list failed." \
--argjson s 3 \
--arg n "Verify Reader access on the subscription and resource." \
'. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE"
continue
fi

admin_events=$(echo "$log_json" | jq '[.[] | select(
.category.value == "Administrative" or .category.localizedValue == "Administrative" or .category == "Administrative"
)]')
count=$(echo "$admin_events" | jq 'length')
if [[ "${count:-0}" -eq 0 ]]; then
continue
fi

sample=$(echo "$admin_events" | jq -c '[.[:15][] | {time: .eventTimestamp, op: .operationName.localizedValue, status: .status.localizedValue}]')
jq --arg t "Cosmos DB \`${acct}\` has ${count} administrative activity log events in last ${LOOKBACK_HOURS}h" \
--argjson d "$sample" \
--argjson s 3 \
--arg n "Review who changed throughput, failover, networking, or backup; correlate with incidents or change windows." \
'. += [{title: $t, details: ($d | tostring), severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE"
done

cat "$OUTPUT_FILE"
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env bash
set -euo pipefail
set -x
# -----------------------------------------------------------------------------
# API / consistency / capability flags vs common production baselines.
# -----------------------------------------------------------------------------

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=cosmosdb_common.sh
source "${SCRIPT_DIR}/cosmosdb_common.sh"

: "${AZURE_RESOURCE_GROUP:?AZURE_RESOURCE_GROUP is required}"

OUTPUT_FILE="cosmosdb_api_consistency_issues.json"
echo '[]' > "$OUTPUT_FILE"

subscription="$(cosmosdb_resolve_subscription)"
[[ -z "$subscription" ]] && exit 0
az account set --subscription "$subscription" 2>/dev/null || exit 0

COSMOS_FILTER="${COSMOSDB_ACCOUNT_NAME:-All}"
mapfile -t accounts < <(cosmosdb_account_names "$subscription" "$AZURE_RESOURCE_GROUP" "$COSMOS_FILTER")
[[ ${#accounts[@]} -eq 0 || -z "${accounts[0]:-}" ]] && { echo '[]' > "$OUTPUT_FILE"; cat "$OUTPUT_FILE"; exit 0; }

for acct in "${accounts[@]}"; do
[[ -z "$acct" ]] && continue
if ! detail=$(az cosmosdb show -g "$AZURE_RESOURCE_GROUP" -n "$acct" --subscription "$subscription" -o json 2>/dev/null); then
jq --arg t "Cannot read Cosmos DB account \`${acct}\`" \
--arg d "az cosmosdb show failed." \
--argjson s 3 \
--arg n "Verify RBAC Reader on the account." \
'. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE"
continue
fi

defcons=$(echo "$detail" | jq -r '.consistencyPolicy.defaultConsistencyLevel // ""')
if [[ "$defcons" == "Eventual" ]]; then
jq --arg t "Cosmos DB \`${acct}\` uses Eventual default consistency" \
--arg d "defaultConsistencyLevel=${defcons}. Many production workloads expect Session or stronger guarantees." \
--argjson s 2 \
--arg n "Re-evaluate consistency tier for application correctness; consider Session or Bounded Staleness if reads require freshness." \
'. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE"
fi

mwl=$(echo "$detail" | jq -r '.enableMultipleWriteLocations // false')
loc_count=$(echo "$detail" | jq '[.locations[]?] | length')
if [[ "$mwl" == "true" ]] && [[ "${loc_count:-0}" -lt 2 ]]; then
jq --arg t "Cosmos DB \`${acct}\` has multi-region writes enabled with a single region" \
--arg d "enableMultipleWriteLocations=true but locations count=${loc_count}." \
--argjson s 2 \
--arg n "Add a second write region or disable multi-region writes if not required." \
'. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE"
fi

dk=$(echo "$detail" | jq -r '.disableKeyBasedMetadataWriteAccess // false')
if [[ "$dk" != "true" ]]; then
jq --arg t "Cosmos DB \`${acct}\` allows key-based metadata writes" \
--arg d "disableKeyBasedMetadataWriteAccess is not true; account metadata can be changed with keys." \
--argjson s 2 \
--arg n "Consider setting disableKeyBasedMetadataWriteAccess and using Azure RBAC for control-plane changes." \
'. += [{title: $t, details: $d, severity: $s, next_steps: $n}]' "$OUTPUT_FILE" > tmp.$$.json && mv tmp.$$.json "$OUTPUT_FILE"
fi
done

cat "$OUTPUT_FILE"
Loading