runwhen-contrib · rw-codebundle-agent · Apr 21, 2026
@@ -0,0 +1,22 @@
+apiVersion: runwhen.com/v1
+kind: GenerationRules
+spec:
+  generationRules:
+    - resourceTypes:
+        - namespace
+      matchRules:
+        - type: pattern
+          pattern: ".+"
+          properties: [name]
+          mode: substring
+      slxs:
+        - baseName: k8s-vm-hc
+          shortenedBaseName: k8s-vm-hc
+          qualifiers: ["namespace", "cluster"]
+          baseTemplateName: k8s-victoriametrics-healthcheck
+          levelOfDetail: basic
+          outputItems:
+            - type: slx
+            - type: sli
+            - type: runbook
+              templateName: k8s-victoriametrics-healthcheck-taskset.yaml
@@ -0,0 +1,48 @@
+apiVersion: runwhen.com/v1
+kind: ServiceLevelIndicator
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  displayUnitsLong: OK
+  displayUnitsShort: ok
+  locations:
+    - {{default_location}}
+  description: Scores VictoriaMetrics workload readiness and PVC binding for the namespace (0–1); use the runbook for deep triage.
+  codeBundle:
+    {% if repo_url %}
+    repoUrl: {{repo_url}}
+    {% else %}
+    repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
+    {% endif %}
+    {% if ref %}
+    ref: {{ref}}
+    {% else %}
+    ref: main
+    {% endif %}
+    pathToRobot: codebundles/k8s-victoriametrics-healthcheck/sli.robot
+  intervalStrategy: intermezzo
+  intervalSeconds: 180
+  configProvided:
+    - name: KUBERNETES_DISTRIBUTION_BINARY
+      value: {{custom.kubernetes_distribution_binary | default("kubectl")}}
+    - name: NAMESPACE
+      value: "{{match_resource.resource.metadata.name}}"
+    - name: CONTEXT
+      value: "{{context}}"
+    - name: VM_LABEL_SELECTOR
+      value: "{{custom.vm_label_selector | default('')}}"
+  secretsProvided:
+  {% if wb_version %}
+    {% include "kubernetes-auth.yaml" ignore missing %}
+  {% else %}
+    - name: kubeconfig
+      workspaceKey: {{custom.kubeconfig_secret_name | default("kubeconfig")}}
+  {% endif %}
+  alertConfig:
+    tasks:
+      persona: eager-edgar
+      sessionTTL: 10m
@@ -0,0 +1,25 @@
+apiVersion: runwhen.com/v1
+kind: ServiceLevelX
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/kubernetes/resources/labeled/sts.svg
+  alias: {{namespace.name}} VictoriaMetrics Health Check
+  asMeasuredBy: Aggregate readiness of VictoriaMetrics pods and storage PVCs plus HTTP/cluster checks in the runbook.
+  configProvided:
+    - name: NAMESPACE
+      value: {{match_resource.resource.metadata.name}}
+  owners:
+    - {{workspace.owner_email}}
+  statement: VictoriaMetrics workloads in this namespace should be ready, storage bound, and serving healthy HTTP endpoints.
+  additionalContext:
+    {% include "kubernetes-hierarchy.yaml" ignore missing %}
+    qualified_name: "{{ match_resource.qualified_name }}"
+  tags:
+    {% include "kubernetes-tags.yaml" ignore missing %}
+    - name: access
+      value: read-only
@@ -0,0 +1,41 @@
+apiVersion: runwhen.com/v1
+kind: Runbook
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  location: {{default_location}}
+  description: Validates VictoriaMetrics pods, PVCs, HTTP health, cluster status, and recent error logs in the namespace.
+  codeBundle:
+    {% if repo_url %}
+    repoUrl: {{repo_url}}
+    {% else %}
+    repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
+    {% endif %}
+    {% if ref %}
+    ref: {{ref}}
+    {% else %}
+    ref: main
+    {% endif %}
+    pathToRobot: codebundles/k8s-victoriametrics-healthcheck/runbook.robot
+  configProvided:
+    - name: KUBERNETES_DISTRIBUTION_BINARY
+      value: {{custom.kubernetes_distribution_binary | default("kubectl")}}
+    - name: NAMESPACE
+      value: "{{match_resource.resource.metadata.name}}"
+    - name: CONTEXT
+      value: "{{context}}"
+    - name: VM_LABEL_SELECTOR
+      value: "{{custom.vm_label_selector | default('')}}"
+    - name: VM_DEPLOYMENT_MODE
+      value: "{{custom.vm_deployment_mode | default('auto')}}"
+  secretsProvided:
+  {% if wb_version %}
+    {% include "kubernetes-auth.yaml" ignore missing %}
+  {% else %}
+    - name: kubeconfig
+      workspaceKey: {{custom.kubeconfig_secret_name | default("kubeconfig")}}
+  {% endif %}
@@ -0,0 +1,146 @@
+version: "3"
+
+tasks:
+  default:
+    desc: "Run/refresh config"
+    cmds:
+      - task: check-unpushed-commits
+      - task: generate-rwl-config
+      - task: run-rwl-discovery
+
+  clean:
+    desc: "Run cleanup tasks"
+    cmds:
+      - task: remove-kubernetes-objects
+      - task: delete-slxs
+      - task: clean-rwl-discovery
+
+  build-infra:
+    desc: "Build test infrastructure"
+    cmds:
+      - task: create-kubernetes-objects
+
+  create-kubernetes-objects:
+    desc: "Apply manifests from kubernetes directory using kubectl"
+    cmds:
+      - kubectl apply -f kubernetes/*
+    silent: true
+
+  remove-kubernetes-objects:
+    desc: "Delete kubernetes objects"
+    cmds:
+      - kubectl delete -f kubernetes/* || true
+    silent: true
+
+  check-unpushed-commits:
+    desc: Check if outstanding commits or file updates need to be pushed before testing.
+    vars:
+      BASE_DIR: "../"
+    cmds:
+      - |
+        echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..."
+        UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true)
+        if [ -n "$UNCOMMITTED_FILES" ]; then
+          echo "Uncommitted changes found:"
+          echo "$UNCOMMITTED_FILES"
+          exit 1
+        fi
+      - |
+        git fetch origin 2>/dev/null || true
+        UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD 2>/dev/null) HEAD 2>/dev/null | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true)
+        if [ -n "$UNPUSHED_FILES" ]; then
+          echo "Unpushed commits found:"
+          echo "$UNPUSHED_FILES"
+          exit 1
+        fi
+    silent: true
+
+  generate-rwl-config:
+    desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)"
+    env:
+      RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}'
+    cmds:
+      - |
+        repo_url=$(git config --get remote.origin.url)
+        branch_name=$(git rev-parse --abbrev-ref HEAD)
+        codebundle=$(basename "$(dirname "$PWD")")
+        namespace=$(yq e 'select(.kind == "Namespace") | .metadata.name' kubernetes/manifest.yaml -N)
+        cat <<EOF > workspaceInfo.yaml
+        workspaceName: "$RW_WORKSPACE"
+        workspaceOwnerEmail: authors@runwhen.com
+        defaultLocation: location-01
+        defaultLOD: none
+        cloudConfig:
+          kubernetes:
+            kubeconfigFile: /shared/kubeconfig
+            namespaceLODs:
+              $namespace: detailed
+            namespaces:
+              - $namespace
+        codeCollections:
+        - repoURL: "$repo_url"
+          branch: "$branch_name"
+          codeBundles: ["$codebundle"]
+        custom:
+          kubeconfig_secret_name: "kubeconfig"
+          kubernetes_distribution_binary: kubectl
+        EOF
+    silent: true
+
+  run-rwl-discovery:
+    desc: "Run RunWhen Local Discovery on test infrastructure"
+    cmds:
+      - |
+        CONTAINER_NAME="RunWhenLocal"
+        if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then
+          docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME
+        elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then
+          docker rm $CONTAINER_NAME
+        fi
+        rm -rf output 2>/dev/null || true
+        mkdir -p output && chmod 777 output 2>/dev/null || true
+        kubeconfig=$(echo "$RW_FROM_FILE" | jq -r .kubeconfig)
+        docker run --name $CONTAINER_NAME -p 8081:8081 \
+          -v "$(pwd)":/shared \
+          -v "$kubeconfig":/shared/kubeconfig \
+          -d ghcr.io/runwhen-contrib/runwhen-local:latest || { echo "Failed to start container"; exit 1; }
+        docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || { echo "Discovery script failed"; exit 1; }
+        echo "Review generated config under output/workspaces/"
+    silent: true
+
+  validate-generation-rules:
+    desc: "Validate YAML files in .runwhen/generation-rules"
+    cmds:
+      - |
+        for cmd in curl yq ajv; do
+          command -v $cmd >/dev/null || { echo "Error: $cmd is required."; exit 1; }
+        done
+        temp_dir=$(mktemp -d)
+        curl -s -o "$temp_dir/generation-rule-schema.json" \
+          https://raw.githubusercontent.com/runwhen-contrib/runwhen-local/refs/heads/main/src/generation-rule-schema.json
+        for yaml_file in ../.runwhen/generation-rules/*.yaml; do
+          echo "Validating $yaml_file"
+          json_file="$temp_dir/$(basename "${yaml_file%.*}.json")"
+          yq -o=json "$yaml_file" > "$json_file"
+          ajv validate -s "$temp_dir/generation-rule-schema.json" -d "$json_file" \
+            --spec=draft2020 --strict=false && echo "$yaml_file is valid." || echo "$yaml_file is invalid."
+        done
+        rm -rf "$temp_dir"
+    silent: true
+
+  delete-slxs:
+    desc: "Delete SLX objects from RunWhen Platform (optional)"
+    env:
+      RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}'
+      RW_API_URL: "{{.RW_API}}"
+      RW_PAT: "{{.RW_PAT}}"
+    cmds:
+      - echo "Optional: set RW_WORKSPACE RW_API RW_PAT to use platform delete"
+    silent: true
+
+  clean-rwl-discovery:
+    desc: "Clean RunWhen Local discovery output"
+    cmds:
+      - rm -rf output
+      - rm -f workspaceInfo.yaml
+    silent: true
@@ -0,0 +1,28 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: test-vm-health
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: fake-victoria-metrics-single
+  namespace: test-vm-health
+  labels:
+    app.kubernetes.io/name: victoria-metrics-single
+    app.kubernetes.io/component: single-binary
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: fake-vm
+  template:
+    metadata:
+      labels:
+        app: fake-vm
+        app.kubernetes.io/name: victoria-metrics-single
+    spec:
+      containers:
+        - name: pause
+          image: registry.k8s.io/pause:3.9
@@ -0,0 +1,62 @@
+# Kubernetes VictoriaMetrics Health Check
+
+This CodeBundle validates [VictoriaMetrics](https://docs.victoriametrics.com/) workloads on Kubernetes: operator-style pod readiness, vmstorage PVC health, in-pod HTTP `/health` probes, optional vmselect cluster status JSON, and recent container log signatures for errors. Use it per namespace where VictoriaMetrics components run.
+
+## Overview
+
+- **Workload readiness**: Discovers Deployments, StatefulSets, DaemonSets, and pods that match common VictoriaMetrics labels (or an optional label selector) and reports CrashLoopBackOff, image pull failures, Pending pods, and rollout conditions that are not healthy.
+- **Storage**: Flags VM-related PVCs that are not `Bound` or show binding or resize problems.
+- **HTTP health**: Runs `kubectl exec` to hit `http://127.0.0.1:<port>/health` inside each running component pod using default ports (vmselect 8481, vminsert 8480, vmstorage 8482, single-node/vmagent 8429).
+- **Cluster status**: When `VM_DEPLOYMENT_MODE` is `cluster` or `auto` and a vmselect pod exists, fetches cluster status JSON from vmselect and surfaces degraded signals when the response suggests unhealthy storage or nodes.
+- **Logs**: Greps recent logs for ERROR, panic, or fatal patterns on VictoriaMetrics-labeled pods.
+- **SLI**: A lightweight `sli.robot` scores namespace health from VM pod readiness and VM-related PVC binding (0–1).
+
+## Configuration
+
+### Required variables
+
+- `CONTEXT`: Kubernetes context name to use.
+- `NAMESPACE`: Namespace where VictoriaMetrics workloads are deployed.
+
+### Optional variables
+
+- `KUBERNETES_DISTRIBUTION_BINARY`: `kubectl`-compatible CLI (default: `kubectl`).
+- `VM_LABEL_SELECTOR`: Optional Kubernetes label selector string (e.g. `app.kubernetes.io/instance=my-vm`) to narrow which pods and workloads are considered. If empty, the scripts use built-in VictoriaMetrics label and name heuristics.
+- `VM_DEPLOYMENT_MODE`: `single`, `cluster`, or `auto` (default: `auto`). Controls whether the vmselect cluster status task runs (`single` skips it; `auto` runs it when a vmselect pod is found).
+
+### Optional environment (scripts only)
+
+These are read by bash scripts when set in the environment; they are not Robot imports:
+
+- `VM_LOG_TAIL_LINES`: Tail length for log scan (default: `120`).
+- `VM_LOG_SINCE`: `kubectl logs --since` window (default: `15m`).
+
+### Secrets
+
+- `kubeconfig`: Standard kubeconfig file for cluster access (same as other Kubernetes CodeBundles).
+
+## Tasks overview
+
+### Verify VictoriaMetrics workload pod readiness
+
+Correlates VictoriaMetrics-tagged controllers and pods with Ready status, waiting reasons, and workload conditions.
+
+### Check VictoriaMetrics storage PVCs
+
+Evaluates PVCs likely tied to VictoriaMetrics (name patterns, labels, and StatefulSet volume claim templates) for phases other than `Bound` and for failing conditions.
+
+### Probe VictoriaMetrics HTTP health endpoints
+
+Uses `kubectl exec` and `wget`/`curl` inside each running pod to call `/health` on the documented default port for that component.
+
+### Check VictoriaMetrics cluster status API (vmselect)
+
+When applicable, queries vmselect for JSON cluster status (tries `/api/v1/status/cluster` on port 8481) and raises issues when the API is unreachable or the payload suggests degraded storage.
+
+### Scan VictoriaMetrics recent logs for errors
+
+Collects recent container logs and matches error/panic/fatal signatures to surface runtime failures.
+
+### SLI (`sli.robot`)
+
+Computes a 0–1 score from VM workload readiness and VM-related PVC binding; sub-metrics `vm_readiness` and `vm_pvc` are published for drill-down.