Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: runwhen.com/v1
kind: GenerationRules
spec:
generationRules:
- resourceTypes:
- postgresclusters.postgres-operator.crunchydata.com
matchRules:
- type: pattern
pattern: ".+"
properties: [name]
mode: substring
slxs:
- baseName: k8s-pgbo-spec
shortenedBaseName: k8s-pgbo-spec
qualifiers: ["resource", "namespace", "cluster"]
baseTemplateName: k8s-postgrescluster-pgbouncer-spec
levelOfDetail: detailed
outputItems:
- type: slx
- type: runbook
templateName: k8s-postgrescluster-pgbouncer-spec-taskset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelX
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/CrunchyDataPrimaryIcon.png
alias: {{match_resource.resource.metadata.name}} PgBouncer Spec Audit
asMeasuredBy: Declared PgBouncer pool settings and replica counts match policy.
configProvided:
- name: POSTGRESCLUSTER_NAME
value: '{{match_resource.resource.metadata.name}}'
owners:
- {{workspace.owner_email}}
statement: PgBouncer proxy settings on the PostgresCluster should match organizational pooling and HA policies.
additionalContext:
{% include "kubernetes-hierarchy.yaml" ignore missing %}
qualified_name: "{{ match_resource.qualified_name }}"
tags:
{% include "kubernetes-tags.yaml" ignore missing %}
- name: access
value: read-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: runwhen.com/v1
kind: Runbook
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
location: {{default_location}}
description: Audits PostgresCluster PgBouncer spec (pool mode, limits, replicas) and optional Prometheus cross-check.
codeBundle:
{% if repo_url %}
repoUrl: {{repo_url}}
{% else %}
repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
{% endif %}
{% if ref %}
ref: {{ref}}
{% else %}
ref: main
{% endif %}
pathToRobot: codebundles/k8s-postgrescluster-pgbouncer-spec/runbook.robot
configProvided:
- name: NAMESPACE
value: "{{match_resource.resource.metadata.namespace}}"
- name: CONTEXT
value: "{{context}}"
- name: POSTGRESCLUSTER_NAME
value: "{{match_resource.resource.metadata.name}}"
- name: EXPECTED_POOL_MODE
value: "{{custom.expected_pool_mode | default('transaction')}}"
- name: MIN_PGBOUNCER_REPLICAS
value: "{{custom.min_pgbouncer_replicas | default('1')}}"
- name: PROMETHEUS_URL
value: "{{custom.prometheus_url | default('')}}"
- name: PROMETHEUS_EXTRA_LABELS
value: "{{custom.prometheus_extra_labels | default('')}}"
- name: KUBERNETES_DISTRIBUTION_BINARY
value: "{{custom.kubernetes_distribution_binary | default('kubectl')}}"
secretsProvided:
{% if wb_version %}
{% include "kubernetes-auth.yaml" ignore missing %}
{% else %}
- name: kubeconfig
workspaceKey: {{custom.kubeconfig_secret_name | default("kubeconfig")}}
{% endif %}
105 changes: 105 additions & 0 deletions codebundles/k8s-postgrescluster-pgbouncer-spec/.test/Taskfile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
version: "3"

tasks:
default:
desc: "Run/refresh config"
cmds:
- task: check-unpushed-commits
- task: generate-rwl-config
- task: run-rwl-discovery

clean:
desc: "Run cleanup tasks"
cmds:
- task: remove-kubernetes-objects
- task: clean-rwl-discovery

build-infra:
desc: "Build test infrastructure"
cmds:
- task: create-kubernetes-objects

create-kubernetes-objects:
desc: "Apply manifests from kubernetes directory using kubectl"
cmds:
- kubectl apply -f kubernetes/*
silent: true

remove-kubernetes-objects:
desc: "Delete kubernetes objects"
cmds:
- kubectl delete -f kubernetes/* --ignore-not-found=true
silent: true

check-unpushed-commits:
desc: Check for uncommitted/unpushed changes
vars:
BASE_DIR: "../"
cmds:
- |
UNCOMMITTED=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}" | grep -v "/\.test/" || true)
if [ -n "$UNCOMMITTED" ]; then
echo "Uncommitted changes found. Commit and push before testing."
exit 1
fi
silent: true

generate-rwl-config:
desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)"
env:
RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}'
cmds:
- |
repo_url=$(git config --get remote.origin.url)
branch_name=$(git rev-parse --abbrev-ref HEAD)
codebundle=$(basename "$(dirname "$PWD")")
namespace=$(yq e 'select(.kind == "Namespace") | .metadata.name' kubernetes/manifest.yaml -N)
cat <<EOF > workspaceInfo.yaml
workspaceName: "$RW_WORKSPACE"
workspaceOwnerEmail: authors@runwhen.com
defaultLocation: location-01
defaultLOD: none
cloudConfig:
kubernetes:
kubeconfigFile: /shared/kubeconfig
namespaceLODs:
$namespace: detailed
namespaces:
- $namespace
codeCollections:
- repoURL: "$repo_url"
branch: "$branch_name"
codeBundles: ["$codebundle"]
custom:
kubeconfig_secret_name: "kubeconfig"
kubernetes_distribution_binary: kubectl
expected_pool_mode: "transaction"
min_pgbouncer_replicas: "1"
EOF
silent: true

run-rwl-discovery:
desc: "Run RunWhen Local Discovery on test infrastructure"
cmds:
- |
CONTAINER_NAME="RunWhenLocal"
if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then
docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME
elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then
docker rm $CONTAINER_NAME
fi
sudo rm -rf output || true
mkdir -p output && chmod 777 output || true
kubeconfig=$(echo "$RW_FROM_FILE" | jq -r .kubeconfig)
docker run --name $CONTAINER_NAME -p 8081:8081 \
-v "$(pwd)":/shared \
-v "$kubeconfig":/shared/kubeconfig \
-d ghcr.io/runwhen-contrib/runwhen-local:latest || { echo "Failed to start container"; exit 1; }
docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || { echo "Discovery failed"; exit 1; }
silent: true

clean-rwl-discovery:
desc: "Remove discovery output"
cmds:
- rm -rf output workspaceInfo.yaml
silent: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: test-pgo-pgbouncer-spec
55 changes: 55 additions & 0 deletions codebundles/k8s-postgrescluster-pgbouncer-spec/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Kubernetes PostgresCluster PgBouncer Spec Audit

This CodeBundle validates Crunchy Postgres Operator (PGO) `PostgresCluster` custom resources for the PgBouncer proxy: `spec.proxy.pgBouncer.config.global` keys such as `pool_mode`, `default_pool_size`, `max_client_conn`, and `max_db_connections`, plus replica expectations. It complements runtime Prometheus health checks by auditing declared GitOps configuration.

## Overview

- **Fetch**: Reads each targeted `PostgresCluster` and summarizes PgBouncer global settings; reports when the proxy block is missing or the CR cannot be read.
- **Pool mode**: Compares `pool_mode` to `EXPECTED_POOL_MODE` (transaction, session, or statement).
- **Connection limits**: Flags inconsistent numeric combinations (for example `max_db_connections` below `default_pool_size`).
- **Replicas**: Compares desired and ready PgBouncer replicas to `MIN_PGBOUNCER_REPLICAS`.
- **Prometheus (optional)**: When `PROMETHEUS_URL` is set, compares CR `max_client_conn` to `pgbouncer_config_max_client_connections` samples.

PGO stores PgBouncer options under `spec.proxy.pgBouncer.config.global` as `pgbouncer.ini`-style keys (underscore names). Confirm field paths with `kubectl explain postgrescluster.spec.proxy.pgBouncer` on your operator version.

## Configuration

### Required Variables

- `CONTEXT`: Kubernetes context name.
- `NAMESPACE`: Namespace containing the `PostgresCluster`.
- `POSTGRESCLUSTER_NAME`: Name of the `PostgresCluster` CR, or `All` to evaluate every `PostgresCluster` in the namespace.
- `EXPECTED_POOL_MODE`: Expected `pool_mode` string (`transaction`, `session`, or `statement`).

### Optional Variables

- `MIN_PGBOUNCER_REPLICAS`: Minimum acceptable PgBouncer replicas for policy (default: `1`).
- `PROMETHEUS_URL`: Base URL for Prometheus (for example `http://prometheus:9090`). Leave empty to skip the cross-check task.
- `PROMETHEUS_EXTRA_LABELS`: Extra PromQL label selectors appended inside the `pgbouncer_config_max_client_connections` selector (for example `pod=~"hippo.*"`). Optional.
- `KUBERNETES_DISTRIBUTION_BINARY`: CLI binary (default: `kubectl`).

### Secrets

- `kubeconfig`: Kubernetes credentials with `get`/`list` on `postgresclusters.postgres-operator.crunchydata.com` and related workloads. Format: kubeconfig YAML.

## Tasks Overview

### Fetch PostgresCluster PgBouncer Configuration

Loads the CR and prints PgBouncer global settings. Raises issues when the cluster cannot be read, when `POSTGRESCLUSTER_NAME=All` finds no clusters, or when `spec.proxy.pgBouncer` is absent.

### Validate Pool Mode Matches Expected

Compares configured `pool_mode` to `EXPECTED_POOL_MODE` for ORM-appropriate pooling.

### Validate Connection Limit Consistency

Checks relationships between `default_pool_size`, `max_client_conn`, and `max_db_connections` and flags impossible or risky combinations.

### Check PgBouncer Replica Count vs Policy

Compares `spec.proxy.pgBouncer.replicas` and `status.proxy.pgBouncer.readyReplicas` to `MIN_PGBOUNCER_REPLICAS`.

### Optional Cross-Check CRD Limits with Live Prometheus Samples

When `PROMETHEUS_URL` is set, runs an instant query for `pgbouncer_config_max_client_connections` in the namespace and compares it to the CR `max_client_conn`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env bash
set -euo pipefail
set -x

# -----------------------------------------------------------------------------
# Optional: PROMETHEUS_URL — compares CR max_client_conn to Prometheus sample.
# OUTPUT: cross_check_issues.json
# -----------------------------------------------------------------------------

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=lib-pgbouncer-spec.sh
source "${SCRIPT_DIR}/lib-pgbouncer-spec.sh"

: "${CONTEXT:?Must set CONTEXT}"
: "${NAMESPACE:?Must set NAMESPACE}"
: "${POSTGRESCLUSTER_NAME:?Must set POSTGRESCLUSTER_NAME}"

OUTPUT_FILE="cross_check_issues.json"
issues_json='[]'

append_issue() {
local title="$1" details="$2" severity="$3" next_steps="$4"
issues_json=$(echo "$issues_json" | jq \
--arg title "$title" \
--arg details "$details" \
--argjson severity "$severity" \
--arg next_steps "$next_steps" \
'. += [{
"title": $title,
"details": $details,
"severity": $severity,
"next_steps": $next_steps
}]')
}

PROM_URL="${PROMETHEUS_URL:-}"

if [ -z "$PROM_URL" ] || [ "$PROM_URL" = "disabled" ]; then
echo '[]' > "$OUTPUT_FILE"
echo "Cross-check skipped (PROMETHEUS_URL not set)."
exit 0
fi

# Trim trailing slash
PROM_URL="${PROM_URL%/}"

while IFS= read -r cluster_name; do
[ -z "$cluster_name" ] && continue

if ! raw_json="$(fetch_cluster_json "$cluster_name")" || [ -z "$raw_json" ]; then
append_issue \
"Cross-check skipped (no CR) for \`${cluster_name}\`" \
"Could not load PostgresCluster JSON." \
2 \
"Fix kubectl access before relying on Prometheus cross-check."
continue
fi

if [ "$(echo "$raw_json" | jq 'if (.spec.proxy.pgBouncer != null) then true else false end')" != "true" ]; then
continue
fi

cr_max="$(numeric_or_empty "$(global_setting_alt "$raw_json" "max_client_conn")")"
if [ -z "$cr_max" ]; then
append_issue \
"Cross-check skipped for \`${cluster_name}\`" \
"max_client_conn not set in CR global config; nothing to compare to metrics." \
1 \
"Set max_client_conn in spec.proxy.pgBouncer.config.global or ignore this informational finding."
continue
fi

# PromQL: max metric in namespace; optional extra labels from PROMETHEUS_EXTRA_LABELS e.g. pod=~\".*cluster-.*
extra="${PROMETHEUS_EXTRA_LABELS:-}"
if [ -n "$extra" ]; then
promql="max(pgbouncer_config_max_client_connections{namespace=\"${NAMESPACE}\",${extra}})"
else
promql="max(pgbouncer_config_max_client_connections{namespace=\"${NAMESPACE}\"})"
fi

resp="$(curl -sS -G "${PROM_URL}/api/v1/query" --data-urlencode "query=${promql}" 2>/dev/null || echo '{"status":"error"}')"
status="$(echo "$resp" | jq -r '.status // "error"')"

if [ "$status" != "success" ]; then
append_issue \
"Prometheus query failed for \`${cluster_name}\`" \
"Could not evaluate: ${promql}. Response snippet: $(echo "$resp" | jq -c . 2>/dev/null | head -c 400)" \
2 \
"Verify PROMETHEUS_URL, network access, and that pgbouncer_exporter metrics exist for this namespace."
continue
fi

metric_val="$(echo "$resp" | jq -r '([.data.result[]?.value[1]? | tonumber] | max) // empty' 2>/dev/null || true)"

if [ -z "$metric_val" ] || [ "$metric_val" = "null" ]; then
append_issue \
"No Prometheus samples for pgbouncer_config_max_client_connections (namespace \`${NAMESPACE}\`)" \
"Query returned empty series for cluster \`${cluster_name}\`." \
2 \
"Confirm ServiceMonitor/PodMonitor scrapes PgBouncer metrics; adjust PROMETHEUS_EXTRA_LABELS if needed."
continue
fi

m_int="$(printf '%.0f' "$metric_val" 2>/dev/null || echo "$metric_val")"
if [ "$m_int" != "$cr_max" ]; then
append_issue \
"CR vs metrics drift for max_client_conn on \`${cluster_name}\`" \
"CR max_client_conn=${cr_max}; Prometheus pgbouncer_config_max_client_connections=${metric_val} (instant max in namespace)." \
3 \
"Reconcile GitOps/CR with running ConfigMap or exporter; ensure single source of truth for pool limits."
fi
done < <(list_postgrescluster_names)

echo "$issues_json" > "$OUTPUT_FILE"
echo "Cross-check wrote ${OUTPUT_FILE}"
Loading