From b8de32d016490c99879cac1f016cb012cc0af00b Mon Sep 17 00:00:00 2001 From: "rw-codebundle-agent[bot]" Date: Mon, 6 Apr 2026 01:19:30 +0000 Subject: [PATCH] Add k8s-postgrescluster-pgbouncer-spec CodeBundle Implements PostgresCluster PgBouncer CR audits for Crunchy PGO: fetch global settings, validate pool_mode vs policy, connection limit ordering, replica policy, and optional Prometheus max_client_conn cross-check. Adds generation rules for postgresclusters.postgres-operator.crunchydata.com, SLX/taskset templates, and Kubernetes test scaffolding. Related: issue #67 design-spec k8s-postgrescluster-pgbouncer-spec. Made-with: Cursor --- .../k8s-postgrescluster-pgbouncer-spec.yaml | 21 ++ ...8s-postgrescluster-pgbouncer-spec-slx.yaml | 25 ++ ...ostgrescluster-pgbouncer-spec-taskset.yaml | 47 +++ .../.test/Taskfile.yaml | 105 ++++++ .../.test/kubernetes/manifest.yaml | 4 + .../README.md | 55 +++ .../cross-check-crd-vs-metrics.sh | 115 +++++++ .../fetch-postgrescluster-pgbouncer.sh | 92 +++++ .../lib-pgbouncer-spec.sh | 62 ++++ .../runbook.robot | 317 ++++++++++++++++++ .../validate-connection-limits.sh | 85 +++++ .../validate-pgbouncer-replicas.sh | 86 +++++ .../validate-pool-mode.sh | 81 +++++ 13 files changed, 1095 insertions(+) create mode 100644 codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/generation-rules/k8s-postgrescluster-pgbouncer-spec.yaml create mode 100644 codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/templates/k8s-postgrescluster-pgbouncer-spec-slx.yaml create mode 100644 codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/templates/k8s-postgrescluster-pgbouncer-spec-taskset.yaml create mode 100644 codebundles/k8s-postgrescluster-pgbouncer-spec/.test/Taskfile.yaml create mode 100644 codebundles/k8s-postgrescluster-pgbouncer-spec/.test/kubernetes/manifest.yaml create mode 100644 codebundles/k8s-postgrescluster-pgbouncer-spec/README.md create mode 100755 codebundles/k8s-postgrescluster-pgbouncer-spec/cross-check-crd-vs-metrics.sh create mode 100755 codebundles/k8s-postgrescluster-pgbouncer-spec/fetch-postgrescluster-pgbouncer.sh create mode 100755 codebundles/k8s-postgrescluster-pgbouncer-spec/lib-pgbouncer-spec.sh create mode 100644 codebundles/k8s-postgrescluster-pgbouncer-spec/runbook.robot create mode 100755 codebundles/k8s-postgrescluster-pgbouncer-spec/validate-connection-limits.sh create mode 100755 codebundles/k8s-postgrescluster-pgbouncer-spec/validate-pgbouncer-replicas.sh create mode 100755 codebundles/k8s-postgrescluster-pgbouncer-spec/validate-pool-mode.sh diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/generation-rules/k8s-postgrescluster-pgbouncer-spec.yaml b/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/generation-rules/k8s-postgrescluster-pgbouncer-spec.yaml new file mode 100644 index 00000000..43c93f10 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/generation-rules/k8s-postgrescluster-pgbouncer-spec.yaml @@ -0,0 +1,21 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + generationRules: + - resourceTypes: + - postgresclusters.postgres-operator.crunchydata.com + matchRules: + - type: pattern + pattern: ".+" + properties: [name] + mode: substring + slxs: + - baseName: k8s-pgbo-spec + shortenedBaseName: k8s-pgbo-spec + qualifiers: ["resource", "namespace", "cluster"] + baseTemplateName: k8s-postgrescluster-pgbouncer-spec + levelOfDetail: detailed + outputItems: + - type: slx + - type: runbook + templateName: k8s-postgrescluster-pgbouncer-spec-taskset.yaml diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/templates/k8s-postgrescluster-pgbouncer-spec-slx.yaml b/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/templates/k8s-postgrescluster-pgbouncer-spec-slx.yaml new file mode 100644 index 00000000..b6621392 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/templates/k8s-postgrescluster-pgbouncer-spec-slx.yaml @@ -0,0 +1,25 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/CrunchyDataPrimaryIcon.png + alias: {{match_resource.resource.metadata.name}} PgBouncer Spec Audit + asMeasuredBy: Declared PgBouncer pool settings and replica counts match policy. + configProvided: + - name: POSTGRESCLUSTER_NAME + value: '{{match_resource.resource.metadata.name}}' + owners: + - {{workspace.owner_email}} + statement: PgBouncer proxy settings on the PostgresCluster should match organizational pooling and HA policies. + additionalContext: + {% include "kubernetes-hierarchy.yaml" ignore missing %} + qualified_name: "{{ match_resource.qualified_name }}" + tags: + {% include "kubernetes-tags.yaml" ignore missing %} + - name: access + value: read-only diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/templates/k8s-postgrescluster-pgbouncer-spec-taskset.yaml b/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/templates/k8s-postgrescluster-pgbouncer-spec-taskset.yaml new file mode 100644 index 00000000..e1c2638a --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/.runwhen/templates/k8s-postgrescluster-pgbouncer-spec-taskset.yaml @@ -0,0 +1,47 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + description: Audits PostgresCluster PgBouncer spec (pool mode, limits, replicas) and optional Prometheus cross-check. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/k8s-postgrescluster-pgbouncer-spec/runbook.robot + configProvided: + - name: NAMESPACE + value: "{{match_resource.resource.metadata.namespace}}" + - name: CONTEXT + value: "{{context}}" + - name: POSTGRESCLUSTER_NAME + value: "{{match_resource.resource.metadata.name}}" + - name: EXPECTED_POOL_MODE + value: "{{custom.expected_pool_mode | default('transaction')}}" + - name: MIN_PGBOUNCER_REPLICAS + value: "{{custom.min_pgbouncer_replicas | default('1')}}" + - name: PROMETHEUS_URL + value: "{{custom.prometheus_url | default('')}}" + - name: PROMETHEUS_EXTRA_LABELS + value: "{{custom.prometheus_extra_labels | default('')}}" + - name: KUBERNETES_DISTRIBUTION_BINARY + value: "{{custom.kubernetes_distribution_binary | default('kubectl')}}" + secretsProvided: + {% if wb_version %} + {% include "kubernetes-auth.yaml" ignore missing %} + {% else %} + - name: kubeconfig + workspaceKey: {{custom.kubeconfig_secret_name | default("kubeconfig")}} + {% endif %} diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/.test/Taskfile.yaml b/codebundles/k8s-postgrescluster-pgbouncer-spec/.test/Taskfile.yaml new file mode 100644 index 00000000..92934791 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/.test/Taskfile.yaml @@ -0,0 +1,105 @@ +version: "3" + +tasks: + default: + desc: "Run/refresh config" + cmds: + - task: check-unpushed-commits + - task: generate-rwl-config + - task: run-rwl-discovery + + clean: + desc: "Run cleanup tasks" + cmds: + - task: remove-kubernetes-objects + - task: clean-rwl-discovery + + build-infra: + desc: "Build test infrastructure" + cmds: + - task: create-kubernetes-objects + + create-kubernetes-objects: + desc: "Apply manifests from kubernetes directory using kubectl" + cmds: + - kubectl apply -f kubernetes/* + silent: true + + remove-kubernetes-objects: + desc: "Delete kubernetes objects" + cmds: + - kubectl delete -f kubernetes/* --ignore-not-found=true + silent: true + + check-unpushed-commits: + desc: Check for uncommitted/unpushed changes + vars: + BASE_DIR: "../" + cmds: + - | + UNCOMMITTED=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}" | grep -v "/\.test/" || true) + if [ -n "$UNCOMMITTED" ]; then + echo "Uncommitted changes found. Commit and push before testing." + exit 1 + fi + silent: true + + generate-rwl-config: + desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)" + env: + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + cmds: + - | + repo_url=$(git config --get remote.origin.url) + branch_name=$(git rev-parse --abbrev-ref HEAD) + codebundle=$(basename "$(dirname "$PWD")") + namespace=$(yq e 'select(.kind == "Namespace") | .metadata.name' kubernetes/manifest.yaml -N) + cat < workspaceInfo.yaml + workspaceName: "$RW_WORKSPACE" + workspaceOwnerEmail: authors@runwhen.com + defaultLocation: location-01 + defaultLOD: none + cloudConfig: + kubernetes: + kubeconfigFile: /shared/kubeconfig + namespaceLODs: + $namespace: detailed + namespaces: + - $namespace + codeCollections: + - repoURL: "$repo_url" + branch: "$branch_name" + codeBundles: ["$codebundle"] + custom: + kubeconfig_secret_name: "kubeconfig" + kubernetes_distribution_binary: kubectl + expected_pool_mode: "transaction" + min_pgbouncer_replicas: "1" + EOF + silent: true + + run-rwl-discovery: + desc: "Run RunWhen Local Discovery on test infrastructure" + cmds: + - | + CONTAINER_NAME="RunWhenLocal" + if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then + docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME + elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then + docker rm $CONTAINER_NAME + fi + sudo rm -rf output || true + mkdir -p output && chmod 777 output || true + kubeconfig=$(echo "$RW_FROM_FILE" | jq -r .kubeconfig) + docker run --name $CONTAINER_NAME -p 8081:8081 \ + -v "$(pwd)":/shared \ + -v "$kubeconfig":/shared/kubeconfig \ + -d ghcr.io/runwhen-contrib/runwhen-local:latest || { echo "Failed to start container"; exit 1; } + docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || { echo "Discovery failed"; exit 1; } + silent: true + + clean-rwl-discovery: + desc: "Remove discovery output" + cmds: + - rm -rf output workspaceInfo.yaml + silent: true diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/.test/kubernetes/manifest.yaml b/codebundles/k8s-postgrescluster-pgbouncer-spec/.test/kubernetes/manifest.yaml new file mode 100644 index 00000000..79a0d7b3 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/.test/kubernetes/manifest.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: test-pgo-pgbouncer-spec diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/README.md b/codebundles/k8s-postgrescluster-pgbouncer-spec/README.md new file mode 100644 index 00000000..1f8bf566 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/README.md @@ -0,0 +1,55 @@ +# Kubernetes PostgresCluster PgBouncer Spec Audit + +This CodeBundle validates Crunchy Postgres Operator (PGO) `PostgresCluster` custom resources for the PgBouncer proxy: `spec.proxy.pgBouncer.config.global` keys such as `pool_mode`, `default_pool_size`, `max_client_conn`, and `max_db_connections`, plus replica expectations. It complements runtime Prometheus health checks by auditing declared GitOps configuration. + +## Overview + +- **Fetch**: Reads each targeted `PostgresCluster` and summarizes PgBouncer global settings; reports when the proxy block is missing or the CR cannot be read. +- **Pool mode**: Compares `pool_mode` to `EXPECTED_POOL_MODE` (transaction, session, or statement). +- **Connection limits**: Flags inconsistent numeric combinations (for example `max_db_connections` below `default_pool_size`). +- **Replicas**: Compares desired and ready PgBouncer replicas to `MIN_PGBOUNCER_REPLICAS`. +- **Prometheus (optional)**: When `PROMETHEUS_URL` is set, compares CR `max_client_conn` to `pgbouncer_config_max_client_connections` samples. + +PGO stores PgBouncer options under `spec.proxy.pgBouncer.config.global` as `pgbouncer.ini`-style keys (underscore names). Confirm field paths with `kubectl explain postgrescluster.spec.proxy.pgBouncer` on your operator version. + +## Configuration + +### Required Variables + +- `CONTEXT`: Kubernetes context name. +- `NAMESPACE`: Namespace containing the `PostgresCluster`. +- `POSTGRESCLUSTER_NAME`: Name of the `PostgresCluster` CR, or `All` to evaluate every `PostgresCluster` in the namespace. +- `EXPECTED_POOL_MODE`: Expected `pool_mode` string (`transaction`, `session`, or `statement`). + +### Optional Variables + +- `MIN_PGBOUNCER_REPLICAS`: Minimum acceptable PgBouncer replicas for policy (default: `1`). +- `PROMETHEUS_URL`: Base URL for Prometheus (for example `http://prometheus:9090`). Leave empty to skip the cross-check task. +- `PROMETHEUS_EXTRA_LABELS`: Extra PromQL label selectors appended inside the `pgbouncer_config_max_client_connections` selector (for example `pod=~"hippo.*"`). Optional. +- `KUBERNETES_DISTRIBUTION_BINARY`: CLI binary (default: `kubectl`). + +### Secrets + +- `kubeconfig`: Kubernetes credentials with `get`/`list` on `postgresclusters.postgres-operator.crunchydata.com` and related workloads. Format: kubeconfig YAML. + +## Tasks Overview + +### Fetch PostgresCluster PgBouncer Configuration + +Loads the CR and prints PgBouncer global settings. Raises issues when the cluster cannot be read, when `POSTGRESCLUSTER_NAME=All` finds no clusters, or when `spec.proxy.pgBouncer` is absent. + +### Validate Pool Mode Matches Expected + +Compares configured `pool_mode` to `EXPECTED_POOL_MODE` for ORM-appropriate pooling. + +### Validate Connection Limit Consistency + +Checks relationships between `default_pool_size`, `max_client_conn`, and `max_db_connections` and flags impossible or risky combinations. + +### Check PgBouncer Replica Count vs Policy + +Compares `spec.proxy.pgBouncer.replicas` and `status.proxy.pgBouncer.readyReplicas` to `MIN_PGBOUNCER_REPLICAS`. + +### Optional Cross-Check CRD Limits with Live Prometheus Samples + +When `PROMETHEUS_URL` is set, runs an instant query for `pgbouncer_config_max_client_connections` in the namespace and compares it to the CR `max_client_conn`. diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/cross-check-crd-vs-metrics.sh b/codebundles/k8s-postgrescluster-pgbouncer-spec/cross-check-crd-vs-metrics.sh new file mode 100755 index 00000000..cf35780e --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/cross-check-crd-vs-metrics.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# ----------------------------------------------------------------------------- +# Optional: PROMETHEUS_URL — compares CR max_client_conn to Prometheus sample. +# OUTPUT: cross_check_issues.json +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=lib-pgbouncer-spec.sh +source "${SCRIPT_DIR}/lib-pgbouncer-spec.sh" + +: "${CONTEXT:?Must set CONTEXT}" +: "${NAMESPACE:?Must set NAMESPACE}" +: "${POSTGRESCLUSTER_NAME:?Must set POSTGRESCLUSTER_NAME}" + +OUTPUT_FILE="cross_check_issues.json" +issues_json='[]' + +append_issue() { + local title="$1" details="$2" severity="$3" next_steps="$4" + issues_json=$(echo "$issues_json" | jq \ + --arg title "$title" \ + --arg details "$details" \ + --argjson severity "$severity" \ + --arg next_steps "$next_steps" \ + '. += [{ + "title": $title, + "details": $details, + "severity": $severity, + "next_steps": $next_steps + }]') +} + +PROM_URL="${PROMETHEUS_URL:-}" + +if [ -z "$PROM_URL" ] || [ "$PROM_URL" = "disabled" ]; then + echo '[]' > "$OUTPUT_FILE" + echo "Cross-check skipped (PROMETHEUS_URL not set)." + exit 0 +fi + +# Trim trailing slash +PROM_URL="${PROM_URL%/}" + +while IFS= read -r cluster_name; do + [ -z "$cluster_name" ] && continue + + if ! raw_json="$(fetch_cluster_json "$cluster_name")" || [ -z "$raw_json" ]; then + append_issue \ + "Cross-check skipped (no CR) for \`${cluster_name}\`" \ + "Could not load PostgresCluster JSON." \ + 2 \ + "Fix kubectl access before relying on Prometheus cross-check." + continue + fi + + if [ "$(echo "$raw_json" | jq 'if (.spec.proxy.pgBouncer != null) then true else false end')" != "true" ]; then + continue + fi + + cr_max="$(numeric_or_empty "$(global_setting_alt "$raw_json" "max_client_conn")")" + if [ -z "$cr_max" ]; then + append_issue \ + "Cross-check skipped for \`${cluster_name}\`" \ + "max_client_conn not set in CR global config; nothing to compare to metrics." \ + 1 \ + "Set max_client_conn in spec.proxy.pgBouncer.config.global or ignore this informational finding." + continue + fi + + # PromQL: max metric in namespace; optional extra labels from PROMETHEUS_EXTRA_LABELS e.g. pod=~\".*cluster-.* + extra="${PROMETHEUS_EXTRA_LABELS:-}" + if [ -n "$extra" ]; then + promql="max(pgbouncer_config_max_client_connections{namespace=\"${NAMESPACE}\",${extra}})" + else + promql="max(pgbouncer_config_max_client_connections{namespace=\"${NAMESPACE}\"})" + fi + + resp="$(curl -sS -G "${PROM_URL}/api/v1/query" --data-urlencode "query=${promql}" 2>/dev/null || echo '{"status":"error"}')" + status="$(echo "$resp" | jq -r '.status // "error"')" + + if [ "$status" != "success" ]; then + append_issue \ + "Prometheus query failed for \`${cluster_name}\`" \ + "Could not evaluate: ${promql}. Response snippet: $(echo "$resp" | jq -c . 2>/dev/null | head -c 400)" \ + 2 \ + "Verify PROMETHEUS_URL, network access, and that pgbouncer_exporter metrics exist for this namespace." + continue + fi + + metric_val="$(echo "$resp" | jq -r '([.data.result[]?.value[1]? | tonumber] | max) // empty' 2>/dev/null || true)" + + if [ -z "$metric_val" ] || [ "$metric_val" = "null" ]; then + append_issue \ + "No Prometheus samples for pgbouncer_config_max_client_connections (namespace \`${NAMESPACE}\`)" \ + "Query returned empty series for cluster \`${cluster_name}\`." \ + 2 \ + "Confirm ServiceMonitor/PodMonitor scrapes PgBouncer metrics; adjust PROMETHEUS_EXTRA_LABELS if needed." + continue + fi + + m_int="$(printf '%.0f' "$metric_val" 2>/dev/null || echo "$metric_val")" + if [ "$m_int" != "$cr_max" ]; then + append_issue \ + "CR vs metrics drift for max_client_conn on \`${cluster_name}\`" \ + "CR max_client_conn=${cr_max}; Prometheus pgbouncer_config_max_client_connections=${metric_val} (instant max in namespace)." \ + 3 \ + "Reconcile GitOps/CR with running ConfigMap or exporter; ensure single source of truth for pool limits." + fi +done < <(list_postgrescluster_names) + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Cross-check wrote ${OUTPUT_FILE}" diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/fetch-postgrescluster-pgbouncer.sh b/codebundles/k8s-postgrescluster-pgbouncer-spec/fetch-postgrescluster-pgbouncer.sh new file mode 100755 index 00000000..5e497fb1 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/fetch-postgrescluster-pgbouncer.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# ----------------------------------------------------------------------------- +# REQUIRED ENV: CONTEXT, NAMESPACE, POSTGRESCLUSTER_NAME (or All), KUBECONFIG +# OUTPUT: fetch_pgbouncer_issues.json (array), human summary on stdout +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=lib-pgbouncer-spec.sh +source "${SCRIPT_DIR}/lib-pgbouncer-spec.sh" + +: "${CONTEXT:?Must set CONTEXT}" +: "${NAMESPACE:?Must set NAMESPACE}" +: "${POSTGRESCLUSTER_NAME:?Must set POSTGRESCLUSTER_NAME}" + +OUTPUT_FILE="fetch_pgbouncer_issues.json" +issues_json='[]' +mode_lc="$(echo "${POSTGRESCLUSTER_NAME}" | tr '[:upper:]' '[:lower:]')" + +append_issue() { + local title="$1" details="$2" severity="$3" next_steps="$4" + issues_json=$(echo "$issues_json" | jq \ + --arg title "$title" \ + --arg details "$details" \ + --argjson severity "$severity" \ + --arg next_steps "$next_steps" \ + '. += [{ + "title": $title, + "details": $details, + "severity": $severity, + "next_steps": $next_steps + }]') +} + +while IFS= read -r cluster_name; do + [ -z "$cluster_name" ] && continue + echo "=== PostgresCluster: ${cluster_name} (namespace ${NAMESPACE}) ===" + + if ! raw_json="$(fetch_cluster_json "$cluster_name")" || [ -z "$raw_json" ]; then + append_issue \ + "Cannot read PostgresCluster \`${cluster_name}\`" \ + "kubectl get ${PG_CRD} failed or returned empty in namespace ${NAMESPACE}." \ + 4 \ + "Verify RBAC (get on postgresclusters), context ${CONTEXT}, and resource name." + echo "---" + continue + fi + + has_proxy="$(echo "$raw_json" | jq 'if (.spec.proxy.pgBouncer != null) then true else false end')" + if [ "$has_proxy" != "true" ]; then + append_issue \ + "PgBouncer proxy not defined for \`${cluster_name}\`" \ + "spec.proxy.pgBouncer is absent; this bundle audits PgBouncer settings only." \ + 2 \ + "Enable spec.proxy.pgBouncer in PostgresCluster or remove this SLX if pooling is not used." + echo "---" + echo "PgBouncer block: not present in spec" + echo "" + continue + fi + + global_json="$(echo "$raw_json" | jq -c '.spec.proxy.pgBouncer.config.global // {}' 2>/dev/null || echo '{}')" + replicas="$(echo "$raw_json" | jq -r '.spec.proxy.pgBouncer.replicas // empty' 2>/dev/null || true)" + pool="$(global_setting_alt "$raw_json" "pool_mode")" + dps="$(global_setting_alt "$raw_json" "default_pool_size")" + mcc="$(global_setting_alt "$raw_json" "max_client_conn")" + mdb="$(global_setting_alt "$raw_json" "max_db_connections")" + + echo "spec.proxy.pgBouncer.replicas: ${replicas:-}" + echo "global.pool_mode: ${pool:-}" + echo "global.default_pool_size: ${dps:-}" + echo "global.max_client_conn: ${mcc:-}" + echo "global.max_db_connections: ${mdb:-}" + echo "global (raw keys): $(echo "$global_json" | jq -r 'keys | join(", ")' 2>/dev/null || echo "{}")" + echo "" +done < <(list_postgrescluster_names) + +if [ "$mode_lc" = "all" ]; then + cnt="$(${KUBECTL} get "$PG_CRD" -n "${NAMESPACE}" --context "${CONTEXT}" --no-headers 2>/dev/null | wc -l | tr -d ' ')" + if [ "${cnt:-0}" -eq 0 ] 2>/dev/null; then + append_issue \ + "No PostgresCluster resources in namespace \`${NAMESPACE}\`" \ + "Discovery (POSTGRESCLUSTER_NAME=All) found zero ${PG_CRD} objects." \ + 1 \ + "Create a PostgresCluster or scope POSTGRESCLUSTER_NAME to a specific cluster." + fi +fi + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Fetch completed. Issues JSON: ${OUTPUT_FILE}" diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/lib-pgbouncer-spec.sh b/codebundles/k8s-postgrescluster-pgbouncer-spec/lib-pgbouncer-spec.sh new file mode 100755 index 00000000..20b5b148 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/lib-pgbouncer-spec.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Shared helpers for PostgresCluster PgBouncer spec validation (sourced by task scripts). + +KUBECTL="${KUBERNETES_DISTRIBUTION_BINARY:-kubectl}" +PG_CRD="postgresclusters.postgres-operator.crunchydata.com" + +list_postgrescluster_names() { + local mode + mode="$(echo "${POSTGRESCLUSTER_NAME:-}" | tr '[:upper:]' '[:lower:]')" + if [ "$mode" = "all" ]; then + ${KUBECTL} get "$PG_CRD" -n "${NAMESPACE:?}" --context "${CONTEXT:?}" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true + else + echo "${POSTGRESCLUSTER_NAME:?}" + fi +} + +fetch_cluster_json() { + local name="$1" + ${KUBECTL} get "$PG_CRD" "$name" -n "${NAMESPACE}" --context "${CONTEXT}" -o json 2>/dev/null +} + +global_setting() { + # Args: cluster_json key (e.g. pool_mode) + local json="$1" + local key="$2" + echo "$json" | jq -r --arg k "$key" ' + .spec.proxy.pgBouncer.config.global // {} | + if has($k) then .[$k] else empty end + ' 2>/dev/null | head -1 +} + +global_setting_alt() { + # Some clusters use camelCase in YAML that serializes differently; try common aliases. + local json="$1" + local key="$2" + local v + v="$(global_setting "$json" "$key")" + if [ -n "$v" ] && [ "$v" != "null" ]; then + echo "$v" + return + fi + case "$key" in + pool_mode) global_setting "$json" "poolMode" ;; + default_pool_size) global_setting "$json" "defaultPoolSize" ;; + max_client_conn) global_setting "$json" "maxClientConn" ;; + max_db_connections) global_setting "$json" "maxDbConnections" ;; + *) echo "" ;; + esac +} + +numeric_or_empty() { + local s="$1" + if [ -z "$s" ] || [ "$s" = "null" ]; then + echo "" + return + fi + if [[ "$s" =~ ^[0-9]+$ ]]; then + echo "$s" + else + echo "" + fi +} diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/runbook.robot b/codebundles/k8s-postgrescluster-pgbouncer-spec/runbook.robot new file mode 100644 index 00000000..38e23bcb --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/runbook.robot @@ -0,0 +1,317 @@ +*** Settings *** +Documentation Audits Crunchy Postgres Operator PostgresCluster specs for PgBouncer proxy settings (pool mode, connection limits, replicas) and optionally compares declared limits to Prometheus metrics. +Metadata Author rw-codebundle-agent +Metadata Display Name Kubernetes PostgresCluster PgBouncer Spec Audit +Metadata Supports Kubernetes PostgresCluster PgBouncer CrunchyData +Force Tags Kubernetes PostgresCluster PgBouncer CrunchyData + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform +Library RW.K8sHelper + +Suite Setup Suite Initialization + + +*** Tasks *** +Fetch PostgresCluster PgBouncer Configuration for `${POSTGRESCLUSTER_NAME}` in Namespace `${NAMESPACE}` + [Documentation] Reads the PostgresCluster CR and prints spec.proxy.pgBouncer global settings; flags missing proxy blocks or RBAC failures. + [Tags] kubernetes postgres pgbouncer config access:read-only data:config + + ${result}= RW.CLI.Run Bash File + ... bash_file=fetch-postgrescluster-pgbouncer.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./fetch-postgrescluster-pgbouncer.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat fetch_pgbouncer_issues.json + ... env=${env} + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${n}= Get Length ${issue_list} + IF ${n} > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue["severity"]} + ... expected=PostgresCluster and PgBouncer spec should be readable and declared when pooling is required + ... actual=${issue["title"]} + ... title=${issue["title"]} + ... reproduce_hint=${result.cmd} + ... details=${issue["details"]} + ... next_steps=${issue["next_steps"]} + END + END + + RW.Core.Add Pre To Report Fetch PostgresCluster PgBouncer configuration (stdout from script): + RW.Core.Add Pre To Report ${result.stdout} + +Validate Pool Mode Matches Expected for `${POSTGRESCLUSTER_NAME}` in Namespace `${NAMESPACE}` + [Documentation] Compares spec.proxy.pgBouncer.config.global pool_mode to EXPECTED_POOL_MODE for ORM-appropriate pooling. + [Tags] kubernetes postgres pgbouncer pool access:read-only data:config + + ${result}= RW.CLI.Run Bash File + ... bash_file=validate-pool-mode.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./validate-pool-mode.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat pool_mode_issues.json + ... env=${env} + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${n}= Get Length ${issue_list} + IF ${n} > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue["severity"]} + ... expected=pool_mode should match policy EXPECTED_POOL_MODE when PgBouncer is enabled + ... actual=${issue["title"]} + ... title=${issue["title"]} + ... reproduce_hint=${result.cmd} + ... details=${issue["details"]} + ... next_steps=${issue["next_steps"]} + END + END + + RW.Core.Add Pre To Report Pool mode validation output: + RW.Core.Add Pre To Report ${result.stdout} + +Validate Connection Limit Consistency for `${POSTGRESCLUSTER_NAME}` in Namespace `${NAMESPACE}` + [Documentation] Checks default_pool_size against max_client_conn and max_db_connections for impossible or risky combinations. + [Tags] kubernetes postgres pgbouncer connections access:read-only data:config + + ${result}= RW.CLI.Run Bash File + ... bash_file=validate-connection-limits.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./validate-connection-limits.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat connection_limits_issues.json + ... env=${env} + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${n}= Get Length ${issue_list} + IF ${n} > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue["severity"]} + ... expected=PgBouncer global limits should be internally consistent + ... actual=${issue["title"]} + ... title=${issue["title"]} + ... reproduce_hint=${result.cmd} + ... details=${issue["details"]} + ... next_steps=${issue["next_steps"]} + END + END + + RW.Core.Add Pre To Report Connection limit validation output: + RW.Core.Add Pre To Report ${result.stdout} + +Check PgBouncer Replica Count vs Policy for `${POSTGRESCLUSTER_NAME}` in Namespace `${NAMESPACE}` + [Documentation] Compares desired and ready PgBouncer replicas to MIN_PGBOUNCER_REPLICAS for HA expectations. + [Tags] kubernetes postgres pgbouncer replicas ha access:read-only data:config + + ${result}= RW.CLI.Run Bash File + ... bash_file=validate-pgbouncer-replicas.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./validate-pgbouncer-replicas.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat pgbouncer_replicas_issues.json + ... env=${env} + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${n}= Get Length ${issue_list} + IF ${n} > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue["severity"]} + ... expected=PgBouncer replicas should meet MIN_PGBOUNCER_REPLICAS when policy requires HA + ... actual=${issue["title"]} + ... title=${issue["title"]} + ... reproduce_hint=${result.cmd} + ... details=${issue["details"]} + ... next_steps=${issue["next_steps"]} + END + END + + RW.Core.Add Pre To Report PgBouncer replica validation output: + RW.Core.Add Pre To Report ${result.stdout} + +Optional Cross-Check CRD Limits with Live Prometheus Samples for `${POSTGRESCLUSTER_NAME}` in Namespace `${NAMESPACE}` + [Documentation] When PROMETHEUS_URL is set, compares CR max_client_conn to recent pgbouncer_config_max_client_connections samples. + [Tags] kubernetes postgres pgbouncer prometheus access:read-only data:metrics + + ${result}= RW.CLI.Run Bash File + ... bash_file=cross-check-crd-vs-metrics.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./cross-check-crd-vs-metrics.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat cross_check_issues.json + ... env=${env} + ... include_in_history=false + + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + + ${n}= Get Length ${issue_list} + IF ${n} > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue["severity"]} + ... expected=Declared max_client_conn should match live exporter metrics when Prometheus is available + ... actual=${issue["title"]} + ... title=${issue["title"]} + ... reproduce_hint=${result.cmd} + ... details=${issue["details"]} + ... next_steps=${issue["next_steps"]} + END + END + + RW.Core.Add Pre To Report Prometheus cross-check output: + RW.Core.Add Pre To Report ${result.stdout} + + +*** Keywords *** +Suite Initialization + ${kubeconfig}= RW.Core.Import Secret + ... kubeconfig + ... type=string + ... description=Kubernetes credentials with get/list on PostgresCluster and workloads + ... pattern=\w* + ... example=kubeconfig YAML + + ${CONTEXT}= RW.Core.Import User Variable CONTEXT + ... type=string + ... description=Kubernetes context name + ... pattern=\w* + ... example=my-cluster + + ${NAMESPACE}= RW.Core.Import User Variable NAMESPACE + ... type=string + ... description=Namespace containing the PostgresCluster + ... pattern=\w* + ... example=postgres-system + + ${POSTGRESCLUSTER_NAME}= RW.Core.Import User Variable POSTGRESCLUSTER_NAME + ... type=string + ... description=PostgresCluster resource name or All to list all in namespace + ... pattern=.* + ... example=hippo + + ${EXPECTED_POOL_MODE}= RW.Core.Import User Variable EXPECTED_POOL_MODE + ... type=string + ... description=Expected pool_mode value (transaction, session, or statement) + ... pattern=\w* + ... example=transaction + + ${MIN_PGBOUNCER_REPLICAS}= RW.Core.Import User Variable MIN_PGBOUNCER_REPLICAS + ... type=string + ... description=Minimum acceptable PgBouncer replicas for policy + ... pattern=\w* + ... default=1 + ... example=2 + + ${PROMETHEUS_URL}= RW.Core.Import User Variable PROMETHEUS_URL + ... type=string + ... description=Optional Prometheus base URL for metric cross-check (leave empty to skip) + ... pattern=.* + ... default= + ... example=http://prometheus-k8s.monitoring.svc:9090 + + ${PROMETHEUS_EXTRA_LABELS}= RW.Core.Import User Variable PROMETHEUS_EXTRA_LABELS + ... type=string + ... description=Optional extra PromQL label selectors appended to the namespace match (e.g. pod=~\"mycluster.*\") + ... pattern=.* + ... default= + ... example=postgres_cluster=\"hippo\" + + ${KUBERNETES_DISTRIBUTION_BINARY}= RW.Core.Import User Variable KUBERNETES_DISTRIBUTION_BINARY + ... type=string + ... description=Kubernetes CLI binary + ... pattern=\w* + ... default=kubectl + ... example=kubectl + + Set Suite Variable ${kubeconfig} ${kubeconfig} + Set Suite Variable ${CONTEXT} ${CONTEXT} + Set Suite Variable ${NAMESPACE} ${NAMESPACE} + Set Suite Variable ${POSTGRESCLUSTER_NAME} ${POSTGRESCLUSTER_NAME} + Set Suite Variable ${EXPECTED_POOL_MODE} ${EXPECTED_POOL_MODE} + Set Suite Variable ${MIN_PGBOUNCER_REPLICAS} ${MIN_PGBOUNCER_REPLICAS} + Set Suite Variable ${PROMETHEUS_URL} ${PROMETHEUS_URL} + Set Suite Variable ${PROMETHEUS_EXTRA_LABELS} ${PROMETHEUS_EXTRA_LABELS} + Set Suite Variable ${KUBERNETES_DISTRIBUTION_BINARY} ${KUBERNETES_DISTRIBUTION_BINARY} + + ${env}= Create Dictionary + ... CONTEXT=${CONTEXT} + ... NAMESPACE=${NAMESPACE} + ... POSTGRESCLUSTER_NAME=${POSTGRESCLUSTER_NAME} + ... EXPECTED_POOL_MODE=${EXPECTED_POOL_MODE} + ... MIN_PGBOUNCER_REPLICAS=${MIN_PGBOUNCER_REPLICAS} + ... PROMETHEUS_URL=${PROMETHEUS_URL} + ... PROMETHEUS_EXTRA_LABELS=${PROMETHEUS_EXTRA_LABELS} + ... KUBERNETES_DISTRIBUTION_BINARY=${KUBERNETES_DISTRIBUTION_BINARY} + ... KUBECONFIG=./${kubeconfig.key} + Set Suite Variable ${env} ${env} + + RW.K8sHelper.Verify Cluster Connectivity + ... binary=${KUBERNETES_DISTRIBUTION_BINARY} + ... context=${CONTEXT} + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-connection-limits.sh b/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-connection-limits.sh new file mode 100755 index 00000000..f9280d45 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-connection-limits.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# ----------------------------------------------------------------------------- +# Validates ordering of default_pool_size vs max_client_conn / max_db_connections. +# OUTPUT: connection_limits_issues.json +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=lib-pgbouncer-spec.sh +source "${SCRIPT_DIR}/lib-pgbouncer-spec.sh" + +: "${CONTEXT:?Must set CONTEXT}" +: "${NAMESPACE:?Must set NAMESPACE}" +: "${POSTGRESCLUSTER_NAME:?Must set POSTGRESCLUSTER_NAME}" + +OUTPUT_FILE="connection_limits_issues.json" +issues_json='[]' + +append_issue() { + local title="$1" details="$2" severity="$3" next_steps="$4" + issues_json=$(echo "$issues_json" | jq \ + --arg title "$title" \ + --arg details "$details" \ + --argjson severity "$severity" \ + --arg next_steps "$next_steps" \ + '. += [{ + "title": $title, + "details": $details, + "severity": $severity, + "next_steps": $next_steps + }]') +} + +while IFS= read -r cluster_name; do + [ -z "$cluster_name" ] && continue + + if ! raw_json="$(fetch_cluster_json "$cluster_name")" || [ -z "$raw_json" ]; then + append_issue \ + "Cannot read PostgresCluster for limits check: \`${cluster_name}\`" \ + "kubectl get failed." \ + 4 \ + "Verify kube access." + continue + fi + + if [ "$(echo "$raw_json" | jq 'if (.spec.proxy.pgBouncer != null) then true else false end')" != "true" ]; then + continue + fi + + dps="$(numeric_or_empty "$(global_setting_alt "$raw_json" "default_pool_size")")" + mcc="$(numeric_or_empty "$(global_setting_alt "$raw_json" "max_client_conn")")" + mdb="$(numeric_or_empty "$(global_setting_alt "$raw_json" "max_db_connections")")" + + if [ -n "$mdb" ] && [ -n "$dps" ] && [ "$mdb" -lt "$dps" ]; then + append_issue \ + "max_db_connections below default_pool_size for \`${cluster_name}\`" \ + "max_db_connections=${mdb} default_pool_size=${dps}; backend limit cannot satisfy pool demand." \ + 3 \ + "Increase max_db_connections or lower default_pool_size in PgBouncer global settings." + fi + + if [ -n "$mcc" ] && [ -n "$dps" ] && [ "$mcc" -lt "$dps" ]; then + append_issue \ + "max_client_conn lower than default_pool_size for \`${cluster_name}\`" \ + "max_client_conn=${mcc} default_pool_size=${dps}; unusual/risky combination." \ + 2 \ + "Review PgBouncer sizing: max_client_conn is usually much larger than per-database pool size." + fi + + if [ -n "$mcc" ] && [ -n "$mdb" ] && [ -n "$dps" ] && [ "$mdb" -gt 0 ] 2>/dev/null; then + # Heuristic: if clients can open more slots than backend allows across pools + if [ "$mcc" -gt "$mdb" ] && [ "$dps" -gt $((mdb / 2)) ] 2>/dev/null; then + append_issue \ + "Possible saturation risk for \`${cluster_name}\`" \ + "max_client_conn=${mcc} max_db_connections=${mdb} default_pool_size=${dps}; clients may compete for limited backend connections." \ + 2 \ + "Align limits with expected client concurrency; consider raising max_db_connections or tuning pool sizes." + fi + fi +done < <(list_postgrescluster_names) + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Connection limits validation wrote ${OUTPUT_FILE}" diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-pgbouncer-replicas.sh b/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-pgbouncer-replicas.sh new file mode 100755 index 00000000..a7aca214 --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-pgbouncer-replicas.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# ----------------------------------------------------------------------------- +# REQUIRED: MIN_PGBOUNCER_REPLICAS (default 1) +# OUTPUT: pgbouncer_replicas_issues.json +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=lib-pgbouncer-spec.sh +source "${SCRIPT_DIR}/lib-pgbouncer-spec.sh" + +: "${CONTEXT:?Must set CONTEXT}" +: "${NAMESPACE:?Must set NAMESPACE}" +: "${POSTGRESCLUSTER_NAME:?Must set POSTGRESCLUSTER_NAME}" + +MIN_R="${MIN_PGBOUNCER_REPLICAS:-1}" +if ! [[ "$MIN_R" =~ ^[0-9]+$ ]]; then + MIN_R=1 +fi + +OUTPUT_FILE="pgbouncer_replicas_issues.json" +issues_json='[]' + +append_issue() { + local title="$1" details="$2" severity="$3" next_steps="$4" + issues_json=$(echo "$issues_json" | jq \ + --arg title "$title" \ + --arg details "$details" \ + --argjson severity "$severity" \ + --arg next_steps "$next_steps" \ + '. += [{ + "title": $title, + "details": $details, + "severity": $severity, + "next_steps": $next_steps + }]') +} + +while IFS= read -r cluster_name; do + [ -z "$cluster_name" ] && continue + + if ! raw_json="$(fetch_cluster_json "$cluster_name")" || [ -z "$raw_json" ]; then + append_issue \ + "Cannot read PostgresCluster for replica check: \`${cluster_name}\`" \ + "kubectl get failed." \ + 4 \ + "Verify kube access." + continue + fi + + if [ "$(echo "$raw_json" | jq 'if (.spec.proxy.pgBouncer != null) then true else false end')" != "true" ]; then + continue + fi + + spec_rep="$(echo "$raw_json" | jq -r '.spec.proxy.pgBouncer.replicas // empty')" + ready_rep="$(echo "$raw_json" | jq -r '.status.proxy.pgBouncer.readyReplicas // empty')" + stat_rep="$(echo "$raw_json" | jq -r '.status.proxy.pgBouncer.replicas // empty')" + + echo "cluster=${cluster_name} spec.replicas=${spec_rep:-} status.replicas=${stat_rep:-?} status.readyReplicas=${ready_rep:-?}" + + effective_spec="$spec_rep" + if [ -z "$effective_spec" ] || [ "$effective_spec" = "null" ]; then + effective_spec=1 + fi + + if [ "$effective_spec" -lt "$MIN_R" ] 2>/dev/null; then + append_issue \ + "PgBouncer spec replicas below policy for \`${cluster_name}\`" \ + "spec.proxy.pgBouncer.replicas=${spec_rep:-1} (effective ${effective_spec}); MIN_PGBOUNCER_REPLICAS=${MIN_R}." \ + 2 \ + "Raise spec.proxy.pgBouncer.replicas to at least ${MIN_R} for HA, or lower MIN_PGBOUNCER_REPLICAS if single replica is acceptable." + fi + + if [ -n "$ready_rep" ] && [ "$ready_rep" != "null" ] && [ "$ready_rep" -lt "$MIN_R" ] 2>/dev/null; then + append_issue \ + "PgBouncer ready replicas below policy for \`${cluster_name}\`" \ + "status.proxy.pgBouncer.readyReplicas=${ready_rep}; policy requires MIN_PGBOUNCER_REPLICAS=${MIN_R}." \ + 2 \ + "Investigate PgBouncer pods (image pull, scheduling, rollout); restore readiness before production traffic." + fi +done < <(list_postgrescluster_names) + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Replica validation wrote ${OUTPUT_FILE}" diff --git a/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-pool-mode.sh b/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-pool-mode.sh new file mode 100755 index 00000000..e4a6fe9f --- /dev/null +++ b/codebundles/k8s-postgrescluster-pgbouncer-spec/validate-pool-mode.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# ----------------------------------------------------------------------------- +# REQUIRED: CONTEXT, NAMESPACE, POSTGRESCLUSTER_NAME, EXPECTED_POOL_MODE +# OUTPUT: pool_mode_issues.json +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=lib-pgbouncer-spec.sh +source "${SCRIPT_DIR}/lib-pgbouncer-spec.sh" + +: "${CONTEXT:?Must set CONTEXT}" +: "${NAMESPACE:?Must set NAMESPACE}" +: "${POSTGRESCLUSTER_NAME:?Must set POSTGRESCLUSTER_NAME}" +: "${EXPECTED_POOL_MODE:?Must set EXPECTED_POOL_MODE}" + +OUTPUT_FILE="pool_mode_issues.json" +issues_json='[]' + +append_issue() { + local title="$1" details="$2" severity="$3" next_steps="$4" + issues_json=$(echo "$issues_json" | jq \ + --arg title "$title" \ + --arg details "$details" \ + --argjson severity "$severity" \ + --arg next_steps "$next_steps" \ + '. += [{ + "title": $title, + "details": $details, + "severity": $severity, + "next_steps": $next_steps + }]') +} + +expected_norm="$(echo "${EXPECTED_POOL_MODE}" | tr '[:upper:]' '[:lower:]')" + +while IFS= read -r cluster_name; do + [ -z "$cluster_name" ] && continue + + if ! raw_json="$(fetch_cluster_json "$cluster_name")" || [ -z "$raw_json" ]; then + append_issue \ + "Cannot read PostgresCluster for pool mode check: \`${cluster_name}\`" \ + "kubectl get failed." \ + 4 \ + "Verify kube access and resource name." + continue + fi + + if [ "$(echo "$raw_json" | jq 'if (.spec.proxy.pgBouncer != null) then true else false end')" != "true" ]; then + append_issue \ + "Pool mode check skipped (no PgBouncer): \`${cluster_name}\`" \ + "spec.proxy.pgBouncer is not configured." \ + 2 \ + "Configure PgBouncer or set EXPECTED_POOL_MODE only when proxy is enabled." + continue + fi + + pool_raw="$(global_setting_alt "$raw_json" "pool_mode")" + if [ -z "$pool_raw" ] || [ "$pool_raw" = "null" ]; then + append_issue \ + "pool_mode not set in PgBouncer global config for \`${cluster_name}\`" \ + "Expected pool_mode (transaction|session|statement) in spec.proxy.pgBouncer.config.global." \ + 2 \ + "Set global.pool_mode in PostgresCluster to match workload (often transaction for server-side pooling/ORMs)." + continue + fi + + pool_norm="$(echo "$pool_raw" | tr '[:upper:]' '[:lower:]')" + if [ "$pool_norm" != "$expected_norm" ]; then + append_issue \ + "pool_mode mismatch for \`${cluster_name}\`" \ + "Found pool_mode=${pool_raw}; policy EXPECTED_POOL_MODE=${EXPECTED_POOL_MODE}." \ + 3 \ + "Update spec.proxy.pgBouncer.config.global.pool_mode to ${EXPECTED_POOL_MODE} or adjust policy if intentional." + fi +done < <(list_postgrescluster_names) + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Pool mode validation wrote ${OUTPUT_FILE}"