From d15a1292606d99f0be978cd0465674d65d759cf7 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 30 Jul 2025 11:39:27 +0530 Subject: [PATCH 01/18] feat: add Azure planned maintenance events retrieval script --- .../maintenance-event.sh | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100755 codebundles/azure-planned-maintenance/maintenance-event.sh diff --git a/codebundles/azure-planned-maintenance/maintenance-event.sh b/codebundles/azure-planned-maintenance/maintenance-event.sh new file mode 100755 index 000000000..5ec335ee2 --- /dev/null +++ b/codebundles/azure-planned-maintenance/maintenance-event.sh @@ -0,0 +1,158 @@ +#!/bin/bash +set -euo pipefail + +# ----------------------------------------------------------------------------- +# Script: get_maintenance_events.sh +# Purpose: Fetches planned maintenance events from Azure Service Health and +# their impacted resources for the specified subscription. +# +# Inputs (Environment Variables): +# AZURE_SUBSCRIPTION_ID (Required): Azure Subscription ID. +# OUTPUT_DIR (Optional): Directory to save the output JSON file. +# +# Outputs: +# File: ${OUTPUT_DIR}/maintenance_events.json +# Contains an array of Azure Planned Maintenance events with parsed impact data. +# ----------------------------------------------------------------------------- + +# Get or set subscription ID +if [ -z "$AZURE_SUBSCRIPTION_ID" ]; then + subscription=$(az account show --query "id" -o tsv) + echo "AZURE_SUBSCRIPTION_ID is not set. Using current subscription ID: $subscription" +else + subscription="$AZURE_SUBSCRIPTION_ID" + echo "Using specified subscription ID: $subscription" +fi + +# Set the subscription ID +echo "Switching to subscription ID: $subscription" +az account set --subscription "$subscription" || { echo "Failed to set subscription."; exit 1; } + +OUTPUT_DIR="${OUTPUT_DIR:-.}" +output_file="${OUTPUT_DIR}/maintenance_events.json" +temp_file="${OUTPUT_DIR}/temp_events.json" + +# Ensure output directory exists +mkdir -p "$OUTPUT_DIR" + +echo "--- Starting Azure Planned Maintenance Event Retrieval ---" +echo "Subscription ID: $subscription" +echo "Output Directory: $OUTPUT_DIR" +echo "Output File: $output_file" + +# Check for required Azure CLI extensions +check_extension() { + local extension=$1 + echo "Checking for '$extension' Azure CLI extension..." + if ! az extension show --name "$extension" &>/dev/null; then + echo "Installing '$extension' extension..." + az extension add --name "$extension" --yes || { + echo "ERROR: Failed to install '$extension' Azure CLI extension." >&2 + exit 1 + } + echo "'$extension' extension installed successfully." + else + echo "'$extension' extension is already installed." + fi +} + +# Install required extensions +check_extension "resource-graph" +check_extension "account" + +# Function to parse impact data into a proper JSON object +parse_impact_data() { + local impact_json=$1 + # Parse the JSON string into a proper JSON object + echo "$impact_json" | jq -r '.' 2>/dev/null || echo '[]' +} + +# Query for planned maintenance events +echo "Fetching planned maintenance events from Azure..." +query=" +ServiceHealthResources +| where type =~ 'microsoft.resourcehealth/events' +| extend + eventType = tostring(properties.EventType), + status = tostring(properties.Status), + description = tostring(properties.Title), + trackingId = tostring(properties.TrackingId), + summary = tostring(properties.Summary), + level = tostring(properties.Level), + impact = properties.Impact, + impactStartTime = todatetime(properties.ImpactStartTime), + impactMitigationTime = todatetime(properties.ImpactMitigationTime) +| where eventType == 'PlannedMaintenance' +| project + subscriptionId, + trackingId, + eventType, + status, + summary, + description, + level, + impactStartTime, + impactMitigationTime, + id, + impact +| order by impactStartTime asc +" + +echo "Executing query to get maintenance events..." +if ! events_result=$(az graph query -q "$query" --subscriptions "$subscription" -o json 2>/dev/null); then + echo "ERROR: Failed to retrieve planned maintenance events from Azure." >&2 + echo "[]" > "$output_file" + exit 1 +fi + +# Process the results +echo "Processing results..." +processed_events=() +count=$(echo "$events_result" | jq -r '.data | length' 2>/dev/null || echo "0") + +echo "Found $count planned maintenance events." + +for ((i=0; i/dev/null) + + # Extract basic event info + base_event=$(echo "$event" | jq '{ + subscriptionId, + trackingId, + eventType, + status, + summary, + description, + level, + impactStartTime, + impactMitigationTime, + id + }') + + # Process impact data + impact_json=$(echo "$event" | jq -r '.impact' 2>/dev/null) + impact_details=$(parse_impact_data "$impact_json" 2>/dev/null) + + # Combine base event with parsed impact + processed_event=$(echo "$base_event" | jq --argjson impact "$impact_details" ' + . + { + impactDetails: $impact + } + ') + + processed_events+=("$processed_event") +done + +# Combine all events into a JSON array +result_json=$(printf '%s\n' "${processed_events[@]}" | jq -s '.') + +# Save to output file +echo "$result_json" > "$output_file" + +# Clean up +rm -f "$temp_file" 2>/dev/null || true + +echo "Results saved to $output_file" +echo "--- Azure Planned Maintenance Event Retrieval Finished ---" + +exit 0 \ No newline at end of file From c62f0f5be36092e513a19c13429297f78fc8a31d Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 30 Jul 2025 13:17:32 +0530 Subject: [PATCH 02/18] add README and Taskfile for Azure planned maintenance codebundle --- .../azure-planned-maintenance/.test/README.md | 95 ++++ .../.test/Taskfile.yaml | 416 ++++++++++++++++++ 2 files changed, 511 insertions(+) create mode 100644 codebundles/azure-planned-maintenance/.test/README.md create mode 100644 codebundles/azure-planned-maintenance/.test/Taskfile.yaml diff --git a/codebundles/azure-planned-maintenance/.test/README.md b/codebundles/azure-planned-maintenance/.test/README.md new file mode 100644 index 000000000..230f4e951 --- /dev/null +++ b/codebundles/azure-planned-maintenance/.test/README.md @@ -0,0 +1,95 @@ +### How to test this codebundle? + +#### Azure service principal Configuration + +We create two distinct Azure service principal with scoped access: + + +**CloudCustodian Service principal** + +Purpose: Service Level Indicator (SLI) monitoring and runbook automation and configured with read only access principles + +```sh +AZURE_SUBSCRIPTION_ID="" +az ad sp create-for-rbac --name c7n --role reader --scopes /subscriptions/${AZURE_SUBSCRIPTION_ID} +``` + +**Infrastructure Deployment Service principal** +Purpose: Cloud infrastructure provisioning and management using Terraform + +```sh +AZURE_SUBSCRIPTION_ID="" +az ad sp create-for-rbac --name provisioner --role contributor --scopes /subscriptions/${AZURE_SUBSCRIPTION_ID} +``` + +# Infrastructure Setup +The terraform directory contains infrastructure used for testing. + + +#### Credential Setup + +Navigate to the `.test/terraform` directory and configure two secret files for authentication: + +`cb.secret` - CloudCustodian and RunWhen Credentials + +Create this file with the following environment variables: + + ```sh + export RW_PAT="" + export RW_WORKSPACE="" + export RW_API_URL="papi.beta.runwhen.com" + + export ARM_SUBSCRIPTION_ID="" + export AZ_TENANT_ID="" + export AZ_CLIENT_SECRET="" + export AZ_CLIENT_ID="" + ``` + + +`tf.secret` - Terraform Deployment Credentials + +Create this file with the following environment variables: + + ```sh + export ARM_SUBSCRIPTION_ID="" + export AZ_TENANT_ID="" + export AZ_CLIENT_SECRET="" + export AZ_CLIENT_ID="" + ``` + + +# Local Development Testing + +Perform an azure login on the command line to interact with the infrastructure provisioned by Terraform. + + +```sh +az login --service-principal \ + --username "" \ + --password "" \ + --tenant "" +``` + +#### Testing Workflow + +1. Build test infra: + ```sh + task build-infra + ``` + +2. Generate RunWhen Configurations + ```sh + tasks + ``` + +3. Upload generated SLx to RunWhen Platform + + ```sh + task upload-slxs + ``` + +4. At last, after testing, clean up the test infrastructure. + + ```sh + task clean + ``` diff --git a/codebundles/azure-planned-maintenance/.test/Taskfile.yaml b/codebundles/azure-planned-maintenance/.test/Taskfile.yaml new file mode 100644 index 000000000..17093c4a6 --- /dev/null +++ b/codebundles/azure-planned-maintenance/.test/Taskfile.yaml @@ -0,0 +1,416 @@ +version: "3" + +tasks: + default: + desc: "Run/refresh config" + cmds: + - task: check-unpushed-commits + - task: generate-rwl-config + - task: run-rwl-discovery + + clean: + desc: "Run cleanup tasks" + cmds: + - task: check-and-cleanup-terraform + - task: delete-slxs + - task: clean-rwl-discovery + + build-infra: + desc: "Build test infrastructure" + cmds: + - task: build-terraform-infra + + check-unpushed-commits: + desc: Check if outstanding commits or file updates need to be pushed before testing. + vars: + # Specify the base directory relative to your Taskfile location + BASE_DIR: "../" + cmds: + - | + echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNCOMMITTED_FILES" ]; then + echo "✗" + echo "Uncommitted changes found:" + echo "$UNCOMMITTED_FILES" + echo "Remember to commit & push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No uncommitted changes in specified directories." + echo "------------" + fi + - | + echo "Checking for unpushed commits in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + git fetch origin + UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD) HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNPUSHED_FILES" ]; then + echo "✗" + echo "Unpushed commits found:" + echo "$UNPUSHED_FILES" + echo "Remember to push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No unpushed commits in specified directories." + echo "------------" + fi + silent: true + generate-rwl-config: + desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)" + env: + ARM_SUBSCRIPTION_ID: "{{.ARM_SUBSCRIPTION_ID}}" + AZ_TENANT_ID: "{{.AZ_TENANT_ID}}" + AZ_CLIENT_SECRET: "{{.AZ_CLIENT_SECRET}}" + AZ_CLIENT_ID: "{{.AZ_CLIENT_ID}}" + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + cmds: + - | + source terraform/tf.secret + repo_url=$(git config --get remote.origin.url) + branch_name=$(git rev-parse --abbrev-ref HEAD) + codebundle=$(basename "$(dirname "$PWD")") + AZURE_SUBSCRIPTION_ID=$ARM_SUBSCRIPTION_ID + subscription_name=$(az account show --subscription ${AZURE_SUBSCRIPTION_ID} --query name -o tsv) + + # Check if AZ_RESOURCE_GROUP is set, otherwise get from Terraform state + if [ -z "${AZ_RESOURCE_GROUP}" ]; then + # Fetch individual cluster details from Terraform state + pushd terraform > /dev/null + resource_group=$(terraform show -json terraform.tfstate | jq -r ' + .values.root_module.resources[] | + select(.type == "azurerm_resource_group") | .values.name') + popd > /dev/null + else + resource_group="${AZ_RESOURCE_GROUP}" + fi + + # Check if resource group is still empty after all checks + if [ -z "$resource_group" ]; then + echo "Error: Missing resource_group details. Either set AZ_RESOURCE_GROUP environment variable or ensure Terraform plan has been applied." + exit 1 + fi + source terraform/cb.secret + # Generate workspaceInfo.yaml with fetched resource_group details + cat < workspaceInfo.yaml + workspaceName: "$RW_WORKSPACE" + workspaceOwnerEmail: authors@runwhen.com + defaultLocation: location-01-us-west1 + defaultLOD: detailed + cloudConfig: + azure: + subscriptionId: "$ARM_SUBSCRIPTION_ID" + tenantId: "$AZ_TENANT_ID" + clientId: "$AZ_CLIENT_ID" + clientSecret: "$AZ_CLIENT_SECRET" + resourceGroupLevelOfDetails: + $resource_group: detailed + codeCollections: + - repoURL: "$repo_url" + branch: "$branch_name" + codeBundles: ["$codebundle"] + custom: + subscription_name: $subscription_name + resource_group: $resource_group + EOF + silent: true + + run-rwl-discovery: + desc: "Run RunWhen Local Discovery on test infrastructure" + cmds: + - | + source terraform/cb.secret + CONTAINER_NAME="RunWhenLocal" + if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Stopping and removing existing container $CONTAINER_NAME..." + docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME + elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Removing existing stopped container $CONTAINER_NAME..." + docker rm $CONTAINER_NAME + else + echo "No existing container named $CONTAINER_NAME found." + fi + + echo "Cleaning up output directory..." + sudo rm -rf output || { echo "Failed to remove output directory"; exit 1; } + mkdir output && chmod 777 output || { echo "Failed to set permissions"; exit 1; } + + echo "Starting new container $CONTAINER_NAME..." + + docker run --name $CONTAINER_NAME -p 8081:8081 -v "$(pwd)":/shared -d ghcr.io/runwhen-contrib/runwhen-local:latest || { + echo "Failed to start container"; exit 1; + } + + echo "Running workspace builder script in container..." + docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || { + echo "Error executing script in container"; exit 1; + } + + echo "Review generated config files under output/workspaces/" + silent: true + + validate-generation-rules: + desc: "Validate YAML files in .runwhen/generation-rules" + cmds: + - | + for cmd in curl yq ajv; do + if ! command -v $cmd &> /dev/null; then + echo "Error: $cmd is required but not installed." + exit 1 + fi + done + + temp_dir=$(mktemp -d) + curl -s -o "$temp_dir/generation-rule-schema.json" https://raw.githubusercontent.com/runwhen-contrib/runwhen-local/refs/heads/main/src/generation-rule-schema.json + + for yaml_file in ../.runwhen/generation-rules/*.yaml; do + echo "Validating $yaml_file" + json_file="$temp_dir/$(basename "${yaml_file%.*}.json")" + yq -o=json "$yaml_file" > "$json_file" + ajv validate -s "$temp_dir/generation-rule-schema.json" -d "$json_file" --spec=draft2020 --strict=false \ + && echo "$yaml_file is valid." || echo "$yaml_file is invalid." + done + + rm -rf "$temp_dir" + silent: true + + check-rwp-config: + desc: Check if env vars are set for RunWhen Platform + cmds: + - | + source terraform/tf.secret + missing_vars=() + + if [ -z "$RW_WORKSPACE" ]; then + missing_vars+=("RW_WORKSPACE") + fi + + if [ -z "$RW_API_URL" ]; then + missing_vars+=("RW_API_URL") + fi + + if [ -z "$RW_PAT" ]; then + missing_vars+=("RW_PAT") + fi + + if [ ${#missing_vars[@]} -ne 0 ]; then + echo "The following required environment variables are missing: ${missing_vars[*]}" + exit 1 + fi + silent: true + + upload-slxs: + desc: "Upload SLX files to the appropriate URL" + env: + RW_WORKSPACE: "{{.RW_WORKSPACE}}" + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/cb.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Upload aborted." + exit 1 + fi + + # Create Secrets + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/secrets" + PAYLOAD="{\"secrets\": {\"az_subscriptionId\": \"${ARM_SUBSCRIPTION_ID}\", \"az_clientId\": \"${AZ_CLIENT_ID}\", \"az_tenantId\": \"${AZ_TENANT_ID}\", \"az_clientSecret\": \"${AZ_CLIENT_SECRET}\"}}" + echo "Uploading secrets to $URL" + response_code=$(curl -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" \ + -w "%{http_code}" -o /dev/null -s) + if [[ "$response_code" == "200" || "$response_code" == "201" ]]; then + echo "Successfully uploaded secrets to $URL" + else + echo "Failed to upload secrets: $SLX_NAME to $URL. Unexpected response code: $response_code" + fi + + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + PAYLOAD=$(jq -n --arg commitMsg "Creating new SLX $SLX_NAME" '{ commitMsg: $commitMsg, files: {} }') + for file in slx.yaml runbook.yaml sli.yaml; do + if [ -f "$dir/$file" ]; then + CONTENT=$(cat "$dir/$file") + PAYLOAD=$(echo "$PAYLOAD" | jq --arg fileContent "$CONTENT" --arg fileName "$file" '.files[$fileName] = $fileContent') + fi + done + + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Uploading SLX: $SLX_NAME to $URL" + + response=$(curl -v -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" -w "%{http_code}" -o /dev/null -s 2>&1) + + if [[ "$response" =~ 200|201 ]]; then + echo "Successfully uploaded SLX: $SLX_NAME to $URL" + else + echo "Failed to upload SLX: $SLX_NAME to $URL. Response:" + echo "$response" + fi + fi + done + silent: true + delete-slxs: + desc: "Delete SLX objects from the appropriate URL" + env: + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/cb.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Deletion aborted." + exit 1 + fi + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Deleting SLX: $SLX_NAME from $URL" + response=$(curl -v -X DELETE "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" -w "%{http_code}" -o /dev/null -s 2>&1) + + if [[ "$response" =~ 200|204 ]]; then + echo "Successfully deleted SLX: $SLX_NAME from $URL" + else + echo "Failed to delete SLX: $SLX_NAME from $URL. Response:" + echo "$response" + fi + fi + done + silent: true + + check-terraform-infra: + desc: "Check if Terraform has any deployed infrastructure in the terraform subdirectory" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ ! -d "terraform" ]; then + echo "Terraform directory not found." + exit 1 + fi + cd terraform + + # Check if Terraform state file exists + if [ ! -f "terraform.tfstate" ]; then + echo "No Terraform state file found in the terraform directory. No infrastructure is deployed." + exit 0 + fi + + # List resources in Terraform state + resources=$(terraform state list) + + # Check if any resources are listed in the state file + if [ -n "$resources" ]; then + echo "Deployed infrastructure detected." + echo "$resources" + exit 0 + else + echo "No deployed infrastructure found in Terraform state." + exit 0 + fi + silent: true + + build-terraform-infra: + desc: "Run terraform apply" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Terraform apply aborted." + exit 1 + fi + task format-and-init-terraform + echo "Starting Terraform Build of Terraform infrastructure..." + terraform apply -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure build completed." + silent: true + + cleanup-terraform-infra: + desc: "Cleanup deployed Terraform infrastructure" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Set Additional env + export TF_VAR_sp_principal_id=$(az ad sp show --id $AZ_CLIENT_ID --query id -o tsv) + export TF_VAR_subscription_id=$ARM_SUBSCRIPTION_ID + export TF_VAR_tenant_id=$AZ_TENANT_ID + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Cleanup aborted." + exit 1 + fi + + echo "Starting cleanup of Terraform infrastructure..." + terraform destroy -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure cleanup completed." + silent: true + + check-and-cleanup-terraform: + desc: "Check and clean up deployed Terraform infrastructure if it exists" + cmds: + - | + # Capture the output of check-terraform-infra + infra_output=$(task check-terraform-infra | tee /dev/tty) + + # Check if output contains indication of deployed infrastructure + if echo "$infra_output" | grep -q "Deployed infrastructure detected"; then + echo "Infrastructure detected; proceeding with cleanup." + task cleanup-terraform-infra + else + echo "No deployed infrastructure found; no cleanup required." + fi + silent: true + + clean-rwl-discovery: + desc: "Check and clean up RunWhen Local discovery output" + cmds: + - | + sudo rm -rf output + rm workspaceInfo.yaml + silent: true From 6fb2b4220b4c732c75d4ba466d679f9f85dfe428 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 30 Jul 2025 13:22:40 +0530 Subject: [PATCH 03/18] add root README --- .../azure-planned-maintenance/README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 codebundles/azure-planned-maintenance/README.md diff --git a/codebundles/azure-planned-maintenance/README.md b/codebundles/azure-planned-maintenance/README.md new file mode 100644 index 000000000..6c6d1085a --- /dev/null +++ b/codebundles/azure-planned-maintenance/README.md @@ -0,0 +1,19 @@ +# Azure Planned Maintenance +This codebundle runs a suite of metrics checks for planned maintenance events in Azure. It identifies: +- Check for planned maintenance events + +## Configuration + +The TaskSet requires initialization to import necessary secrets, services, and user variables. The following variables should be set: + +- `AZ_USERNAME`: Service principal's client ID +- `AZ_SECRET_VALUE`: The credential secret value from the app registration +- `AZ_TENANT`: The Azure tenancy ID +- `AZ_SUBSCRIPTION`: The Azure subscription ID + +## Testing +See the .test directory for infrastructure test code. + +## Notes + +This codebundle assumes the service principal authentication flow \ No newline at end of file From afc4d87c7bb261fd56e388e12cd3aa3fe21809db Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 30 Jul 2025 13:26:02 +0530 Subject: [PATCH 04/18] add rw template --- .../azure-planned-maintenance.yaml | 23 ++++++++ .../azure-planned-maintenance-sli.yaml | 55 +++++++++++++++++++ .../azure-planned-maintenance-slx.yaml | 27 +++++++++ .../azure-planned-maintenance-taskset.yaml | 37 +++++++++++++ 4 files changed, 142 insertions(+) create mode 100644 codebundles/azure-planned-maintenance/.runwhen/generation-rules/azure-planned-maintenance.yaml create mode 100644 codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-sli.yaml create mode 100644 codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml create mode 100644 codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-taskset.yaml diff --git a/codebundles/azure-planned-maintenance/.runwhen/generation-rules/azure-planned-maintenance.yaml b/codebundles/azure-planned-maintenance/.runwhen/generation-rules/azure-planned-maintenance.yaml new file mode 100644 index 000000000..baefb7fc9 --- /dev/null +++ b/codebundles/azure-planned-maintenance/.runwhen/generation-rules/azure-planned-maintenance.yaml @@ -0,0 +1,23 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + platform: azure + generationRules: + - resourceTypes: + - resource_group + matchRules: + - type: pattern + pattern: ".+" + properties: [name] + mode: substring + slxs: + - baseName: azure-planned-maintenance + qualifiers: ["subscription_id"] + baseTemplateName: azure-planned-maintenance + levelOfDetail: basic + outputItems: + - type: slx + - type: sli + - type: runbook + templateName: azure-planned-maintenance-taskset.yaml + - type: workflow diff --git a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-sli.yaml b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-sli.yaml new file mode 100644 index 000000000..b0f003d61 --- /dev/null +++ b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-sli.yaml @@ -0,0 +1,55 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelIndicator +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + displayUnitsLong: OK + displayUnitsShort: ok + locations: + - {{default_location}} + description: Check for planned maintenance events in Azure subscription {{ match_resource.subscription_name }}. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/azure-c7n-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/azure-planned-maintenance/sli.robot + intervalStrategy: intermezzo + intervalSeconds: 600 + configProvided: + - name: AZURE_RESOURCE_GROUP + value: "{{resource_group.name}}" + - name: AZURE_SUBSCRIPTION_NAME + value: "{{ subscription_name }}" + - name: AZURE_SUBSCRIPTION_ID + value: "{{ subscription_id }}" + secretsProvided: + {% if wb_version %} + {% include "azure-auth.yaml" ignore missing %} + {% else %} + - name: azure_credentials + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} + alerts: + warning: + operator: < + threshold: '1' + for: '20m' + ticket: + operator: < + threshold: '1' + for: '40m' + page: + operator: '==' + threshold: '0' + for: '' \ No newline at end of file diff --git a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml new file mode 100644 index 000000000..a1fcb17ad --- /dev/null +++ b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml @@ -0,0 +1,27 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/azure/storage/10086-icon-service-Storage-Accounts.svg + alias: {{ match_resource.resource_group.name }} Azure Planned Maintenance + asMeasuredBy: Composite health score of resources & activities. + configProvided: + - name: SLX_PLACEHOLDER + value: SLX_PLACEHOLDER + owners: + - {{workspace.owner_email}} + statement: Check for planned maintenance events on Azure resources. + additionalContext: + {% include "azure-hierarchy.yaml" ignore missing %} + qualified_name: "{{ match_resource.qualified_name }}" + tags: + {% include "azure-tags.yaml" ignore missing %} + - name: service + value: resourcegraph + - name: access + value: read-only \ No newline at end of file diff --git a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-taskset.yaml b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-taskset.yaml new file mode 100644 index 000000000..99d99699b --- /dev/null +++ b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-taskset.yaml @@ -0,0 +1,37 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + description: List planned maintenance events in azure subscription {{ match_resource.subscription_name }}. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/azure-c7n-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/azure-planned-maintenance/runbook.robot + configProvided: + - name: AZURE_RESOURCE_GROUP + value: "{{resource_group.name}}" + - name: AZURE_SUBSCRIPTION_NAME + value: "{{ subscription_name }}" + - name: AZURE_SUBSCRIPTION_ID + value: "{{ subscription_id }}" + secretsProvided: + {% if wb_version %} + {% include "azure-auth.yaml" ignore missing %} + {% else %} + - name: azure_credentials + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} From 17b28c2b469883c82d94e2b0b49f3c1a06d550bd Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 4 Aug 2025 05:16:57 +0530 Subject: [PATCH 05/18] OUTPUT_DIR variable from maintenance-event.sh --- .../azure-planned-maintenance/maintenance-event.sh | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/codebundles/azure-planned-maintenance/maintenance-event.sh b/codebundles/azure-planned-maintenance/maintenance-event.sh index 5ec335ee2..cc0017f08 100755 --- a/codebundles/azure-planned-maintenance/maintenance-event.sh +++ b/codebundles/azure-planned-maintenance/maintenance-event.sh @@ -2,16 +2,15 @@ set -euo pipefail # ----------------------------------------------------------------------------- -# Script: get_maintenance_events.sh +# Script: maintenance-event.sh # Purpose: Fetches planned maintenance events from Azure Service Health and # their impacted resources for the specified subscription. # # Inputs (Environment Variables): # AZURE_SUBSCRIPTION_ID (Required): Azure Subscription ID. -# OUTPUT_DIR (Optional): Directory to save the output JSON file. # # Outputs: -# File: ${OUTPUT_DIR}/maintenance_events.json +# File: maintenance_events.json # Contains an array of Azure Planned Maintenance events with parsed impact data. # ----------------------------------------------------------------------------- @@ -28,16 +27,11 @@ fi echo "Switching to subscription ID: $subscription" az account set --subscription "$subscription" || { echo "Failed to set subscription."; exit 1; } -OUTPUT_DIR="${OUTPUT_DIR:-.}" -output_file="${OUTPUT_DIR}/maintenance_events.json" -temp_file="${OUTPUT_DIR}/temp_events.json" - -# Ensure output directory exists -mkdir -p "$OUTPUT_DIR" +output_file="maintenance_events.json" +temp_file="temp_events.json" echo "--- Starting Azure Planned Maintenance Event Retrieval ---" echo "Subscription ID: $subscription" -echo "Output Directory: $OUTPUT_DIR" echo "Output File: $output_file" # Check for required Azure CLI extensions From 078eb955b5681dd4c94e3dfaed3bfc2a438d151c Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 4 Aug 2025 05:19:58 +0530 Subject: [PATCH 06/18] add script to fetche az service issue events --- .../service-issue-events.sh | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100755 codebundles/azure-planned-maintenance/service-issue-events.sh diff --git a/codebundles/azure-planned-maintenance/service-issue-events.sh b/codebundles/azure-planned-maintenance/service-issue-events.sh new file mode 100755 index 000000000..3a7b821eb --- /dev/null +++ b/codebundles/azure-planned-maintenance/service-issue-events.sh @@ -0,0 +1,153 @@ +#!/bin/bash +set -euo pipefail + +# ----------------------------------------------------------------------------- +# Script: service-issue-events.sh +# Purpose: Fetches service issue events from Azure Service Health and +# their impacted resources for the specified subscription. +# +# Inputs (Environment Variables): +# AZURE_SUBSCRIPTION_ID (Required): Azure Subscription ID. +# +# Outputs: +# File: service_issue_events.json +# Contains an array of Azure Service Issue events with parsed impact data. +# ----------------------------------------------------------------------------- + +# Get or set subscription ID +if [ -z "$AZURE_SUBSCRIPTION_ID" ]; then + subscription=$(az account show --query "id" -o tsv) + echo "AZURE_SUBSCRIPTION_ID is not set. Using current subscription ID: $subscription" +else + subscription="$AZURE_SUBSCRIPTION_ID" + echo "Using specified subscription ID: $subscription" +fi + +# Set the subscription ID +echo "Switching to subscription ID: $subscription" +az account set --subscription "$subscription" || { echo "Failed to set subscription."; exit 1; } + + +output_file="service_issue_events.json" +temp_file="temp_service_issue_events.json" + +echo "--- Starting Azure Service Issue Event Retrieval ---" +echo "Subscription ID: $subscription" +echo "Output File: $output_file" + +# Check for required Azure CLI extensions +check_extension() { + local extension=$1 + echo "Checking for '$extension' Azure CLI extension..." + if ! az extension show --name "$extension" &>/dev/null; then + echo "Installing '$extension' extension..." + az extension add --name "$extension" --yes || { + echo "ERROR: Failed to install '$extension' Azure CLI extension." >&2 + exit 1 + } + echo "'$extension' extension installed successfully." + else + echo "'$extension' extension is already installed." + fi +} + +# Install required extensions +check_extension "resource-graph" +check_extension "account" + +# Function to parse impact data into a proper JSON object +parse_impact_data() { + local impact_json=$1 + # Parse the JSON string into a proper JSON object + echo "$impact_json" | jq -r '.' 2>/dev/null || echo '[]' +} + +# Query for service issue events +echo "Fetching service issue events from Azure..." +query=" +ServiceHealthResources +| where type =~ 'microsoft.resourcehealth/events' +| extend + eventType = tostring(properties.EventType), + status = tostring(properties.Status), + description = tostring(properties.Title), + trackingId = tostring(properties.TrackingId), + summary = tostring(properties.Summary), + level = tostring(properties.Level), + impact = properties.Impact, + impactStartTime = todatetime(properties.ImpactStartTime), + impactMitigationTime = todatetime(properties.ImpactMitigationTime) +| where eventType == 'ServiceIssue' +| project + subscriptionId, + trackingId, + eventType, + status, + summary, + description, + level, + impactStartTime, + impactMitigationTime, + id, + impact +| order by impactStartTime asc +" + +echo "Executing query to get service issue events..." +if ! events_result=$(az graph query -q "$query" --subscriptions "$subscription" -o json 2>/dev/null); then + echo "ERROR: Failed to retrieve service issue events from Azure." >&2 + echo "[]" > "$output_file" + exit 1 +fi + +# Process the results +echo "Processing results..." +processed_events=() +count=$(echo "$events_result" | jq -r '.data | length' 2>/dev/null || echo "0") + +echo "Found $count service issue events." + +for ((i=0; i/dev/null) + + # Extract basic event info + base_event=$(echo "$event" | jq '{ + subscriptionId, + trackingId, + eventType, + status, + summary, + description, + level, + impactStartTime, + impactMitigationTime, + id + }') + + # Process impact data + impact_json=$(echo "$event" | jq -r '.impact' 2>/dev/null) + impact_details=$(parse_impact_data "$impact_json" 2>/dev/null) + + # Combine base event with parsed impact + processed_event=$(echo "$base_event" | jq --argjson impact "$impact_details" ' + . + { + impactDetails: $impact + } + ') + + processed_events+=("$processed_event") +done + +# Combine all events into a JSON array +result_json=$(printf '%s\n' "${processed_events[@]}" | jq -s '.') + +# Save to output file +echo "$result_json" > "$output_file" + +# Clean up +rm -f "$temp_file" 2>/dev/null || true + +echo "Results saved to $output_file" +echo "--- Azure Service Issue Event Retrieval Finished ---" + +exit 0 \ No newline at end of file From cdcbd0679db744990896d1e4c46ebaef4d0aeccd Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 4 Aug 2025 09:40:23 +0530 Subject: [PATCH 07/18] Filter active planned maintenance events in maintenance-event.sh --- codebundles/azure-planned-maintenance/maintenance-event.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/codebundles/azure-planned-maintenance/maintenance-event.sh b/codebundles/azure-planned-maintenance/maintenance-event.sh index cc0017f08..df86ccc3d 100755 --- a/codebundles/azure-planned-maintenance/maintenance-event.sh +++ b/codebundles/azure-planned-maintenance/maintenance-event.sh @@ -77,6 +77,7 @@ ServiceHealthResources impactStartTime = todatetime(properties.ImpactStartTime), impactMitigationTime = todatetime(properties.ImpactMitigationTime) | where eventType == 'PlannedMaintenance' +| where status == 'Active' | project subscriptionId, trackingId, From 33366806c31c01ec37eecd5d7f01b0d86f2ce080 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 4 Aug 2025 09:44:08 +0530 Subject: [PATCH 08/18] rename service issue script and add status filter --- .../{service-issue-events.sh => service-issue-event.sh} | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) rename codebundles/azure-planned-maintenance/{service-issue-events.sh => service-issue-event.sh} (98%) diff --git a/codebundles/azure-planned-maintenance/service-issue-events.sh b/codebundles/azure-planned-maintenance/service-issue-event.sh similarity index 98% rename from codebundles/azure-planned-maintenance/service-issue-events.sh rename to codebundles/azure-planned-maintenance/service-issue-event.sh index 3a7b821eb..440b9103b 100755 --- a/codebundles/azure-planned-maintenance/service-issue-events.sh +++ b/codebundles/azure-planned-maintenance/service-issue-event.sh @@ -2,7 +2,7 @@ set -euo pipefail # ----------------------------------------------------------------------------- -# Script: service-issue-events.sh +# Script: service-issue-event.sh # Purpose: Fetches service issue events from Azure Service Health and # their impacted resources for the specified subscription. # @@ -78,6 +78,7 @@ ServiceHealthResources impactStartTime = todatetime(properties.ImpactStartTime), impactMitigationTime = todatetime(properties.ImpactMitigationTime) | where eventType == 'ServiceIssue' +| where status == 'Active' | project subscriptionId, trackingId, From 71fa8371b9a1f3d7a1546933d15fa1ac4938a06d Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 4 Aug 2025 10:33:03 +0530 Subject: [PATCH 09/18] Add Azure planned maintenance and service issue event retrieval runbook --- .../azure-planned-maintenance/runbook.robot | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 codebundles/azure-planned-maintenance/runbook.robot diff --git a/codebundles/azure-planned-maintenance/runbook.robot b/codebundles/azure-planned-maintenance/runbook.robot new file mode 100644 index 000000000..4ca46c388 --- /dev/null +++ b/codebundles/azure-planned-maintenance/runbook.robot @@ -0,0 +1,131 @@ +*** Settings *** +Documentation Check Azure planned maintenance events +Metadata Author saurabh3460 +Metadata Display Name Azure Planned Maintenance +Metadata Supports Azure Planned Maintenance +Force Tags Azure Planned Maintenance + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform +Library OperatingSystem +Library Collections +Library DateTime +Suite Setup Suite Initialization + + +*** Tasks *** +List Azure Planned Maintenance Events + [Documentation] List Azure planned maintenance events for the subscription + [Tags] Maintenance Azure access:read-only + # Run the script to fetch maintenance events + ${maintenance_cmd}= RW.CLI.Run Bash File + ... bash_file=maintenance-event.sh + ... env=${env} + ... timeout_seconds=300 + ... include_in_history=false + + # Read the output file + ${report_data}= RW.CLI.Run Cli + ... cmd=cat maintenance_events.json + + TRY + ${event_list}= Evaluate json.loads(r'''${report_data.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${event_list}= Create List + END + + IF $event_list + # Format the results for the report + ${formatted_results}= RW.CLI.Run Cli + ... cmd=jq -r '["TrackingId", "EventType", "Status", "Level", "ImpactStartTime", "ImpactMitigationTime", "Description"], (.[] | [ .trackingId, .eventType, .status, .level, .impactStartTime, .impactMitigationTime, (.description | gsub("\\n"; " ") | gsub("\\r"; "")) ]) | @tsv' maintenance_events.json | column -t -s $'\t' + RW.Core.Add Pre To Report Azure Planned Maintenance Events Summary:\n========================================\n${formatted_results.stdout} + ${pretty_events}= Evaluate pprint.pformat(${event_list}) modules=pprint + # Raise a single issue for all events + ${event_count}= Get Length ${event_list} + RW.Core.Add Issue + ... severity=4 + ... expected=No planned maintenance events should impact resources + ... actual=Found ${event_count} planned maintenance event(s) + ... title=Azure Planned Maintenance Events detected in subscription `${AZURE_SUBSCRIPTION_NAME}` + ... details={"maintenance_events": ${pretty_events}, "subscription_name": "${AZURE_SUBSCRIPTION_NAME}"} + ... next_steps=Review the azure planned maintenance events in subscription `${AZURE_SUBSCRIPTION_NAME}` + ELSE + RW.Core.Add Pre To Report "No planned maintenance events found in the subscription `${AZURE_SUBSCRIPTION_NAME}`" + END + +List Azure Service Issue Events + [Documentation] List Azure service issue events for the subscription + [Tags] Maintenance Azure access:read-only + # Run the script to fetch maintenance events + ${maintenance_cmd}= RW.CLI.Run Bash File + ... bash_file=service-issue-event.sh + ... env=${env} + ... timeout_seconds=300 + ... include_in_history=false + + # Read the output file + ${report_data}= RW.CLI.Run Cli + ... cmd=cat service_issue_events.json + + TRY + ${event_list}= Evaluate json.loads(r'''${report_data.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${event_list}= Create List + END + + IF $event_list + # Format the results for the report + ${formatted_results}= RW.CLI.Run Cli + ... cmd=jq -r '["TrackingId", "EventType", "Status", "Level", "ImpactStartTime", "ImpactMitigationTime", "Description"], (.[] | [ .trackingId, .eventType, .status, .level, .impactStartTime, .impactMitigationTime, (.description | gsub("\\n"; " ") | gsub("\\r"; "")) ]) | @tsv' service_issue_events.json | column -t -s $'\t' + RW.Core.Add Pre To Report Azure Service Issue Events Summary:\n========================================\n${formatted_results.stdout} + ${pretty_events}= Evaluate pprint.pformat(${event_list}) modules=pprint + # Raise a single issue for all events + ${event_count}= Get Length ${event_list} + RW.Core.Add Issue + ... severity=4 + ... expected=No service issue events should impact resources + ... actual=Found ${event_count} service issue event(s) + ... title=Azure Service Issue Events detected in subscription `${AZURE_SUBSCRIPTION_NAME}` + ... details={"service_issue_events": ${pretty_events}, "subscription_name": "${AZURE_SUBSCRIPTION_NAME}"} + ... next_steps=Review the azure service issue events in subscription `${AZURE_SUBSCRIPTION_NAME}` + ELSE + RW.Core.Add Pre To Report "No service issue events found in the subscription `${AZURE_SUBSCRIPTION_NAME}`" + END + +*** Keywords *** +Suite Initialization + ${azure_credentials}= RW.Core.Import Secret + ... azure_credentials + ... type=string + ... description=The secret containing AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID + ... pattern=\w* + ${AZURE_RESOURCE_GROUP}= RW.Core.Import User Variable AZURE_RESOURCE_GROUP + ... type=string + ... description=Azure resource group. + ... pattern=\w* + ${AZURE_SUBSCRIPTION_ID}= RW.Core.Import User Variable AZURE_SUBSCRIPTION_ID + ... type=string + ... description=The Azure Subscription ID for the resource. + ... pattern=\w* + ... default="" + ${AZURE_SUBSCRIPTION_NAME}= RW.Core.Import User Variable AZURE_SUBSCRIPTION_NAME + ... type=string + ... description=The Azure Subscription Name for the resource. + ... pattern=\w* + ... default="" + Set Suite Variable ${AZURE_SUBSCRIPTION_ID} ${AZURE_SUBSCRIPTION_ID} + Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} + Set Suite Variable ${AZURE_SUBSCRIPTION_NAME} ${AZURE_SUBSCRIPTION_NAME} + # Set Azure subscription context for Cloud Custodian + RW.CLI.Run Cli + ... cmd=az account set --subscription ${AZURE_SUBSCRIPTION_ID} + ... include_in_history=false + + Set Suite Variable + ... ${env} + ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_SUBSCRIPTION_ID":"${AZURE_SUBSCRIPTION_ID}"} \ No newline at end of file From 81abc8087ae2346379d03a364b91ff74807eec21 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 4 Aug 2025 11:53:29 +0530 Subject: [PATCH 10/18] Add script to fetch impacted resources from Azure Service Health --- .../impacted-resource.sh | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100755 codebundles/azure-planned-maintenance/impacted-resource.sh diff --git a/codebundles/azure-planned-maintenance/impacted-resource.sh b/codebundles/azure-planned-maintenance/impacted-resource.sh new file mode 100755 index 000000000..bc88d1cd7 --- /dev/null +++ b/codebundles/azure-planned-maintenance/impacted-resource.sh @@ -0,0 +1,89 @@ +#!/bin/bash +set -euo pipefail + +# ----------------------------------------------------------------------------- +# Script: impacted-resource.sh +# Purpose: Fetches impacted resources from Azure Service Health for the specified subscription. +# Inputs (Environment Variables): +# AZURE_SUBSCRIPTION_ID (Required): Azure Subscription ID. +# Outputs: +# File: impacted_resources.json +# Contains an array of impacted resources. +# ----------------------------------------------------------------------------- + + +# Get or set subscription ID +if [ -z "${AZURE_SUBSCRIPTION_ID:-}" ]; then + subscription=$(az account show --query "id" -o tsv) + echo "AZURE_SUBSCRIPTION_ID is not set. Using current subscription ID: $subscription" +else + subscription="$AZURE_SUBSCRIPTION_ID" + echo "Using specified subscription ID: $subscription" +fi + +# Set the subscription ID +echo "Switching to subscription ID: $subscription" +az account set --subscription "$subscription" || { echo "Failed to set subscription."; exit 1; } + +output_file="impacted_resources.json" +temp_file="temp_impacted_resources.json" + +# Check for required Azure CLI extensions +check_extension() { + local extension=$1 + echo "Checking for '$extension' Azure CLI extension..." + if ! az extension show --name "$extension" &>/dev/null; then + echo "Installing '$extension' extension..." + az extension add --name "$extension" --yes || { + echo "ERROR: Failed to install '$extension' Azure CLI extension." >&2 + exit 1 + } + echo "'$extension' extension installed successfully." + else + echo "'$extension' extension is already installed." + fi +} + +# Install required extensions +check_extension "resource-graph" +check_extension "account" + +# KQL Query for impacted resources +query=" +ServiceHealthResources +| where type == 'microsoft.resourcehealth/events/impactedresources' +| extend TrackingId = split(split(id, '/events/', 1)[0], '/impactedResources', 0)[0] +| extend p = parse_json(properties) +| project subscriptionId, TrackingId, resourceName=p.resourceName, resourceGroup=p.resourceGroup, resourceType=p.targetResourceType, details = p, id +" + +echo "Fetching impacted resources from Azure..." +if ! resources_result=$(az graph query -q "$query" --subscriptions "$subscription" -o json 2>/dev/null); then + echo "ERROR: Failed to retrieve impacted resources from Azure." >&2 + echo "[]" > "$output_file" + exit 1 +fi + +echo "Processing results..." +count=$(echo "$resources_result" | jq -r '.data | length' 2>/dev/null || echo "0") +echo "Found $count impacted resources." + +processed_resources=() +for ((i=0; i/dev/null) + processed_resources+=("$resource") +done + +# Combine all resources into a JSON array +result_json=$(printf '%s\n' "${processed_resources[@]}" | jq -s '.') + +# Save to output file +echo "$result_json" > "$output_file" + +# Clean up +rm -f "$temp_file" 2>/dev/null || true + +echo "Results saved to $output_file" +echo "--- Azure Impacted Resource Retrieval Finished ---" + +exit 0 From f14eb12b06d6a65d44c6a7b597a8b0123b68d36e Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 4 Aug 2025 11:54:03 +0530 Subject: [PATCH 11/18] Add task to list Azure impacted resources in runbook --- .../azure-planned-maintenance/runbook.robot | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/codebundles/azure-planned-maintenance/runbook.robot b/codebundles/azure-planned-maintenance/runbook.robot index 4ca46c388..4ea308317 100644 --- a/codebundles/azure-planned-maintenance/runbook.robot +++ b/codebundles/azure-planned-maintenance/runbook.robot @@ -96,7 +96,48 @@ List Azure Service Issue Events ELSE RW.Core.Add Pre To Report "No service issue events found in the subscription `${AZURE_SUBSCRIPTION_NAME}`" END - + +List Azure Impacted Resources + [Documentation] List Azure resources impacted by planned maintenance or other events + [Tags] Maintenance Azure Impacted access:read-only + # Run the script to fetch impacted resources + ${impacted_cmd}= RW.CLI.Run Bash File + ... bash_file=impacted-resource.sh + ... env=${env} + ... timeout_seconds=300 + ... include_in_history=false + + # Read the output file + ${report_data}= RW.CLI.Run Cli + ... cmd=cat impacted_resources.json + + TRY + ${impacted_list}= Evaluate json.loads(r'''${report_data.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${impacted_list}= Create List + END + + IF $impacted_list + # Format the results for the report + ${formatted_results}= RW.CLI.Run Cli + ... cmd=jq -r '["ResourceName", "ResourceGroup", "ResourceType", "TrackingId", "SubscriptionId", "ResourceLink"], (.[] | [ .resourceName, .resourceGroup, .resourceType, .TrackingId, .subscriptionId, ("https://portal.azure.com/#@/resource" + .id + "/overview") ]) | @tsv' impacted_resources.json | column -t -s $'\t' + RW.Core.Add Pre To Report Azure Impacted Resources Summary:\n========================================\n${formatted_results.stdout} + + # Raise a single issue for all impacted resources + ${impacted_count}= Get Length ${impacted_list} + ${pretty_impacted}= Evaluate pprint.pformat(${impacted_list}) modules=pprint + RW.Core.Add Issue + ... severity=3 + ... expected=No Azure resources should be impacted by planned maintenance or other events + ... actual=Found ${impacted_count} impacted resource(s) + ... title=Azure Impacted Resources detected in subscription `${AZURE_SUBSCRIPTION_NAME}` + ... details={"impacted_resources": ${pretty_impacted}, "subscription_name": "${AZURE_SUBSCRIPTION_NAME}"} + ... next_steps=Review the impacted resources in subscription `${AZURE_SUBSCRIPTION_NAME}` + ELSE + RW.Core.Add Pre To Report "No impacted resources found for the subscription." + END + *** Keywords *** Suite Initialization ${azure_credentials}= RW.Core.Import Secret From a40f32f8befa673beb981f60d89e26857d8f2b55 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 5 Aug 2025 11:01:17 +0530 Subject: [PATCH 12/18] Add sli --- .../azure-planned-maintenance/sli.robot | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 codebundles/azure-planned-maintenance/sli.robot diff --git a/codebundles/azure-planned-maintenance/sli.robot b/codebundles/azure-planned-maintenance/sli.robot new file mode 100644 index 000000000..dc627a171 --- /dev/null +++ b/codebundles/azure-planned-maintenance/sli.robot @@ -0,0 +1,130 @@ +*** Settings *** +Documentation Check Azure planned maintenance events, service issue events, and impacted resources +Metadata Author saurabh3460 +Metadata Display Name Azure Planned Maintenance +Metadata Supports Azure Planned Maintenance +Force Tags Azure Planned Maintenance + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + + +Suite Setup Suite Initialization +*** Tasks *** +Count Azure Planned Maintenance Events + [Documentation] Count the number of Azure planned maintenance events for the subscription + [Tags] SLI Azure Maintenance access:read-only + # Run the script to fetch maintenance events + ${maintenance_cmd}= RW.CLI.Run Bash File + ... bash_file=maintenance-event.sh + ... env=${env} + ... timeout_seconds=300 + ... include_in_history=false + + # Read the output file + ${report_data}= RW.CLI.Run Cli + ... cmd=cat maintenance_events.json + + TRY + ${event_list}= Evaluate json.loads(r'''${report_data.stdout}''') json + EXCEPT + ${event_list}= Create List + END + + ${maintenance_event_count}= Get Length ${event_list} + Set Global Variable ${maintenance_event_count} + + # Optional: Set a score variable (1 if no events, 0 otherwise) + ${maintenance_event_score}= Evaluate 1 if int(${maintenance_event_count}) == 0 else 0 + Set Global Variable ${maintenance_event_score} + +Count Azure Service Issue Events + [Documentation] Count the number of Azure service issue events for the subscription + [Tags] SLI Azure ServiceIssue access:read-only + # Run the script to fetch service issue events + ${service_issue_cmd}= RW.CLI.Run Bash File + ... bash_file=service-issue-event.sh + ... env=${env} + ... timeout_seconds=300 + ... include_in_history=false + + # Read the output file + ${report_data}= RW.CLI.Run Cli + ... cmd=cat service_issue_events.json + + TRY + ${event_list}= Evaluate json.loads(r'''${report_data.stdout}''') json + EXCEPT + ${event_list}= Create List + END + + ${service_issue_event_count}= Get Length ${event_list} + Set Global Variable ${service_issue_event_count} + + # Optional: Set a score variable (1 if no events, 0 otherwise) + ${service_issue_event_score}= Evaluate 1 if int(${service_issue_event_count}) == 0 else 0 + Set Global Variable ${service_issue_event_score} + + +Count Azure Impacted Resources + [Documentation] Count the number of Azure resources currently impacted by planned maintenance or other events + [Tags] SLI Azure Impacted access:read-only + # Run the script to fetch impacted resources + ${impacted_cmd}= RW.CLI.Run Bash File + ... bash_file=impacted-resource.sh + ... env=${env} + ... timeout_seconds=300 + ... include_in_history=false + + # Read the output file + ${report_data}= RW.CLI.Run Cli + ... cmd=cat impacted_resources.json + + TRY + ${impacted_list}= Evaluate json.loads(r'''${report_data.stdout}''') json + EXCEPT + ${impacted_list}= Create List + END + + ${impacted_resource_count}= Get Length ${impacted_list} + Set Global Variable ${impacted_resource_count} + + # Optional: Set a score variable (1 if no impacted resources, 0 otherwise) + ${impacted_resource_score}= Evaluate 1 if int(${impacted_resource_count}) == 0 else 0 + Set Global Variable ${impacted_resource_score} + +Generate Health Score + ${health_score}= Evaluate (${maintenance_event_score} + ${service_issue_event_score} + ${impacted_resource_score}) / 3 + ${health_score}= Convert to Number ${health_score} 2 + RW.Core.Push Metric ${health_score} + + +*** Keywords *** +Suite Initialization + ${azure_credentials}= RW.Core.Import Secret + ... azure_credentials + ... type=string + ... description=The secret containing AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID + ... pattern=\w* + ${AZURE_SUBSCRIPTION_ID}= RW.Core.Import User Variable AZURE_SUBSCRIPTION_ID + ... type=string + ... description=The Azure Subscription ID for the resource. + ... pattern=\w* + ... default="" + ${AZURE_RESOURCE_GROUP}= RW.Core.Import User Variable AZURE_RESOURCE_GROUP + ... type=string + ... description=Azure resource group. + ... pattern=\w* + Set Suite Variable ${AZURE_SUBSCRIPTION_ID} ${AZURE_SUBSCRIPTION_ID} + Set Suite Variable ${AZURE_RESOURCE_GROUP} ${AZURE_RESOURCE_GROUP} + Set Suite Variable + ... ${env} + ... {"AZURE_RESOURCE_GROUP":"${AZURE_RESOURCE_GROUP}", "AZURE_SUBSCRIPTION_ID":"${AZURE_SUBSCRIPTION_ID}"} + + # Set Azure subscription context for Cloud Custodian + RW.CLI.Run Cli + ... cmd=az account set --subscription ${AZURE_SUBSCRIPTION_ID} + ... include_in_history=false \ No newline at end of file From 6a98a9a1dd1528caf65b43a262f4b88c5d73c09f Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 5 Aug 2025 11:01:53 +0530 Subject: [PATCH 13/18] Update doc and adjust severity levels in runbook --- codebundles/azure-planned-maintenance/runbook.robot | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/codebundles/azure-planned-maintenance/runbook.robot b/codebundles/azure-planned-maintenance/runbook.robot index 4ea308317..8d75381c7 100644 --- a/codebundles/azure-planned-maintenance/runbook.robot +++ b/codebundles/azure-planned-maintenance/runbook.robot @@ -1,5 +1,5 @@ *** Settings *** -Documentation Check Azure planned maintenance events +Documentation List Azure planned maintenance events, service issue events, and impacted resources Metadata Author saurabh3460 Metadata Display Name Azure Planned Maintenance Metadata Supports Azure Planned Maintenance @@ -10,9 +10,7 @@ Library BuiltIn Library RW.Core Library RW.CLI Library RW.platform -Library OperatingSystem -Library Collections -Library DateTime + Suite Setup Suite Initialization @@ -47,7 +45,7 @@ List Azure Planned Maintenance Events # Raise a single issue for all events ${event_count}= Get Length ${event_list} RW.Core.Add Issue - ... severity=4 + ... severity=3 ... expected=No planned maintenance events should impact resources ... actual=Found ${event_count} planned maintenance event(s) ... title=Azure Planned Maintenance Events detected in subscription `${AZURE_SUBSCRIPTION_NAME}` @@ -87,7 +85,7 @@ List Azure Service Issue Events # Raise a single issue for all events ${event_count}= Get Length ${event_list} RW.Core.Add Issue - ... severity=4 + ... severity=3 ... expected=No service issue events should impact resources ... actual=Found ${event_count} service issue event(s) ... title=Azure Service Issue Events detected in subscription `${AZURE_SUBSCRIPTION_NAME}` From fd1cce6de5a1e59a7e475ea442b6b1e65ba85c2b Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 5 Aug 2025 11:05:11 +0530 Subject: [PATCH 14/18] Add Taskfile for testing in Azure planned maintenance cb --- .../.test/terraform/Taskfile.yaml | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 codebundles/azure-planned-maintenance/.test/terraform/Taskfile.yaml diff --git a/codebundles/azure-planned-maintenance/.test/terraform/Taskfile.yaml b/codebundles/azure-planned-maintenance/.test/terraform/Taskfile.yaml new file mode 100644 index 000000000..08e0e835d --- /dev/null +++ b/codebundles/azure-planned-maintenance/.test/terraform/Taskfile.yaml @@ -0,0 +1,69 @@ +version: '3' + +env: + TERM: screen-256color + +tasks: + default: + cmds: + - task: test + + test: + desc: Run tests. + cmds: + - task: test-terraform + + clean: + desc: Clean the environment. + cmds: + - task: clean-go + - task: clean-terraform + + clean-terraform: + desc: Clean the terraform environment (remove terraform directories and files) + cmds: + - find . -type d -name .terraform -exec rm -rf {} + + - find . -type f -name .terraform.lock.hcl -delete + + format-and-init-terraform: + desc: Run Terraform fmt and init + cmds: + - | + terraform fmt + terraform init + test-terraform: + desc: Run tests for all terraform directories. + silent: true + env: + DIRECTORIES: + sh: find . -path '*/.terraform/*' -prune -o -name '*.tf' -type f -exec dirname {} \; | sort -u + cmds: + - | + BOLD=$(tput bold) + NORM=$(tput sgr0) + + CWD=$PWD + + for d in $DIRECTORIES; do + cd $d + echo "${BOLD}$PWD:${NORM}" + if ! terraform fmt -check=true -list=false -recursive=false; then + echo " ✗ terraform fmt" && exit 1 + else + echo " √ terraform fmt" + fi + + if ! terraform init -backend=false -input=false -get=true -no-color > /dev/null; then + echo " ✗ terraform init" && exit 1 + else + echo " √ terraform init" + fi + + if ! terraform validate > /dev/null; then + echo " ✗ terraform validate" && exit 1 + else + echo " √ terraform validate" + fi + + cd $CWD + done \ No newline at end of file From 2c5920e45f3592c8f8642b28380ebd0e816f6db4 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 5 Aug 2025 11:10:33 +0530 Subject: [PATCH 15/18] Remove workflow type from azure-planned-maintenance.yaml generation rules --- .../.runwhen/generation-rules/azure-planned-maintenance.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/codebundles/azure-planned-maintenance/.runwhen/generation-rules/azure-planned-maintenance.yaml b/codebundles/azure-planned-maintenance/.runwhen/generation-rules/azure-planned-maintenance.yaml index baefb7fc9..bae336487 100644 --- a/codebundles/azure-planned-maintenance/.runwhen/generation-rules/azure-planned-maintenance.yaml +++ b/codebundles/azure-planned-maintenance/.runwhen/generation-rules/azure-planned-maintenance.yaml @@ -20,4 +20,3 @@ spec: - type: sli - type: runbook templateName: azure-planned-maintenance-taskset.yaml - - type: workflow From 3631282f53c2b55f5fa94504f14b15b05e871940 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 5 Aug 2025 11:22:34 +0530 Subject: [PATCH 16/18] Update image URL and alias in azure-planned-maintenance-slx.yaml template --- .../.runwhen/templates/azure-planned-maintenance-slx.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml index a1fcb17ad..f1ddfe834 100644 --- a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml +++ b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml @@ -7,8 +7,8 @@ metadata: annotations: {% include "common-annotations.yaml" %} spec: - imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/azure/storage/10086-icon-service-Storage-Accounts.svg - alias: {{ match_resource.resource_group.name }} Azure Planned Maintenance + imageURL: https://placeholder.svg + alias: {{ resource_group.name }} Azure Planned Maintenance asMeasuredBy: Composite health score of resources & activities. configProvided: - name: SLX_PLACEHOLDER From be0ff5eb41c63c48062e0a7e4aa7fa1afe8f4ed8 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 5 Aug 2025 11:46:42 +0530 Subject: [PATCH 17/18] wip --- .../.runwhen/templates/azure-planned-maintenance-slx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml index f1ddfe834..b0acfce3e 100644 --- a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml +++ b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml @@ -8,7 +8,7 @@ metadata: {% include "common-annotations.yaml" %} spec: imageURL: https://placeholder.svg - alias: {{ resource_group.name }} Azure Planned Maintenance + alias: Azure Planned Maintenance asMeasuredBy: Composite health score of resources & activities. configProvided: - name: SLX_PLACEHOLDER From 7a43fc7a256d8b82950c73dbcea84d45333a3ce7 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 5 Aug 2025 16:35:00 +0530 Subject: [PATCH 18/18] wip --- .../.runwhen/templates/azure-planned-maintenance-slx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml index b0acfce3e..2363d0e94 100644 --- a/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml +++ b/codebundles/azure-planned-maintenance/.runwhen/templates/azure-planned-maintenance-slx.yaml @@ -8,7 +8,7 @@ metadata: {% include "common-annotations.yaml" %} spec: imageURL: https://placeholder.svg - alias: Azure Planned Maintenance + alias: {{ match_resource.subscription_name }} Azure Planned Maintenance asMeasuredBy: Composite health score of resources & activities. configProvided: - name: SLX_PLACEHOLDER