diff --git a/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json b/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json index 8f21b2bf51..8c18422fcc 100644 --- a/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json +++ b/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json @@ -33,8 +33,8 @@ { "package": "cffi==1.17.1", "type": "pip_module" }, { "package": "prometheus_client==0.20.0", "type": "pip_module" }, { "package": "kubernetes==33.1.0", "type": "pip_module" }, - { "package": "quay.io/strimzi/operator", "tag": "0.48.0", "type": "image" }, - { "package": "quay.io/strimzi/kafka", "tag": "0.48.0-kafka-4.1.0", "type": "image" }, + { "package": "quay.io/strimzi/operator", "tag": "1.0.1", "type": "image" }, + { "package": "quay.io/strimzi/kafka", "tag": "1.0.1-kafka-4.2.0", "type": "image" }, { "package": "docker.io/dellhpcomniaaisolution/ubuntu-ldms", "tag": "1.1", "type": "image" }, { "package": "quay.io/dell/container-storage-modules/csm-metrics-powerscale", "tag": "v1.12.0", "type": "image" }, { "package": "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", "tag": "0.150.1", "type": "image" }, @@ -46,8 +46,8 @@ { "package": "quay.io/jetstack/cert-manager-webhook", "tag": "v1.10.0", "type": "image" }, { "package": "quay.io/jetstack/cert-manager-acmesolver", "tag": "v1.10.0", "type": "image" }, { "package": "cert-manager-v1.10.0", "type": "tarball", "url": "https://charts.jetstack.io/charts/cert-manager-v1.10.0.tgz" }, - { "package": "strimzi-kafka-operator-helm-3-chart-0.48.0", "type": "tarball", "url": "https://github.com/strimzi/strimzi-kafka-operator/releases/download/0.48.0/strimzi-kafka-operator-helm-3-chart-0.48.0.tgz" }, - { "package": "quay.io/strimzi/kafka-bridge", "tag": "0.33.1", "type": "image" }, + { "package": "strimzi-kafka-operator-helm-3-chart-1.0.1", "type": "tarball", "url": "https://github.com/strimzi/strimzi-kafka-operator/releases/download/1.0.1/strimzi-kafka-operator-helm-3-chart-1.0.1.tgz" }, + { "package": "quay.io/strimzi/kafka-bridge", "tag": "1.0.0", "type": "image" }, { "package": "docker.io/victoriametrics/operator", "tag": "v0.68.3", "type": "image" }, { "package": "docker.io/victoriametrics/operator", "tag": "config-reloader-v0.68.3", "type": "image" }, { "package": "victoria-metrics-operator-0.59.3", "type": "tarball", "url": "https://github.com/VictoriaMetrics/helm-charts/releases/download/victoria-metrics-operator-0.59.3/victoria-metrics-operator-0.59.3.tgz" }, diff --git a/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 b/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 index edfcb8c583..1cf09db3f9 100644 --- a/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 +++ b/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 @@ -41,7 +41,7 @@ server4: # ------------------------------------------------------------------- # Multi-subnet configuration (requires coresmd v0.6.x+) # To enable multi-subnet DHCP: - # 1. Pull the new coresmd image: podman pull ghcr.io/openchami/coresmd:v0.6.x + # 1. Pull the new coresmd image: podman pull ghcr.io/openchami/coresmd:v0.6.3 # 2. Comment out the single-subnet coresmd and bootloop lines above # 3. Uncomment the multi-subnet coresmd and bootloop blocks below # 4. Replace the new coresmd image version in files: /etc/containers/systemd/coresmd-coredhcp.container /etc/containers/systemd/coresmd-coredns.container with the old version diff --git a/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafka.yaml.j2 b/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafka.yaml.j2 index 929af037c4..a9a0f6196f 100644 --- a/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafka.yaml.j2 +++ b/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafka.yaml.j2 @@ -1,4 +1,4 @@ -apiVersion: kafka.strimzi.io/v1beta2 +apiVersion: kafka.strimzi.io/v1 kind: KafkaNodePool metadata: name: controller @@ -19,7 +19,7 @@ spec: deleteClaim: false --- -apiVersion: kafka.strimzi.io/v1beta2 +apiVersion: kafka.strimzi.io/v1 kind: KafkaNodePool metadata: name: broker @@ -40,7 +40,7 @@ spec: deleteClaim: false --- -apiVersion: kafka.strimzi.io/v1beta2 +apiVersion: kafka.strimzi.io/v1 kind: Kafka metadata: name: kafka @@ -50,8 +50,8 @@ metadata: strimzi.io/kraft: enabled spec: kafka: - version: 4.1.0 - metadataVersion: 4.1-IV0 + version: 4.2.0 + metadataVersion: 4.2-IV0 listeners: - name: internal port: 9092 diff --git a/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafka_bridge.yaml.j2 b/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafka_bridge.yaml.j2 index 35a0862cf3..b4fdb5689c 100644 --- a/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafka_bridge.yaml.j2 +++ b/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafka_bridge.yaml.j2 @@ -1,12 +1,11 @@ --- -apiVersion: kafka.strimzi.io/v1beta2 +apiVersion: kafka.strimzi.io/v1 kind: KafkaBridge metadata: name: bridge namespace: telemetry spec: bootstrapServers: kafka-kafka-bootstrap:9093 - enableMetrics: true http: port: 8080 # Enable TLS for Kafka connection diff --git a/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafkapump_user.yaml.j2 b/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafkapump_user.yaml.j2 index 413a7fe72d..df1b9f9bae 100644 --- a/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafkapump_user.yaml.j2 +++ b/provision/roles/telemetry/templates/telemetry/kafka/kafka.kafkapump_user.yaml.j2 @@ -13,7 +13,7 @@ # limitations under the License. --- -apiVersion: kafka.strimzi.io/v1beta2 +apiVersion: kafka.strimzi.io/v1 kind: KafkaUser metadata: name: kafkapump diff --git a/provision/roles/telemetry/templates/telemetry/kafka/kafka.topic.yaml.j2 b/provision/roles/telemetry/templates/telemetry/kafka/kafka.topic.yaml.j2 index 9ae180ecfd..974712e78f 100644 --- a/provision/roles/telemetry/templates/telemetry/kafka/kafka.topic.yaml.j2 +++ b/provision/roles/telemetry/templates/telemetry/kafka/kafka.topic.yaml.j2 @@ -1,4 +1,4 @@ -apiVersion: kafka.strimzi.io/v1beta2 +apiVersion: kafka.strimzi.io/v1 kind: KafkaTopic metadata: name: {{ topic_name }} diff --git a/provision/roles/telemetry/templates/telemetry/telemetry.sh.j2 b/provision/roles/telemetry/templates/telemetry/telemetry.sh.j2 index c711863f48..8321e8e61d 100644 --- a/provision/roles/telemetry/templates/telemetry/telemetry.sh.j2 +++ b/provision/roles/telemetry/templates/telemetry/telemetry.sh.j2 @@ -19,6 +19,15 @@ else helm -n telemetry install strimzi-cluster-operator "${DEPLOY_DIR}/{{ strimzi_kafka_pkg }}.tar.gz" fi +# Helm 3 does NOT update CRDs on 'helm upgrade'. Explicitly apply CRDs +# from the chart tarball so that new API versions are registered before +# kubectl apply -k attempts to create Kafka resources. +echo " Applying Strimzi CRDs from chart (Helm 3 does not update CRDs on upgrade)..." +_STRIMZI_CRD_TMP=$(mktemp -d) +tar -xzf "${DEPLOY_DIR}/{{ strimzi_kafka_pkg }}.tar.gz" -C "$_STRIMZI_CRD_TMP" +kubectl apply -f "$_STRIMZI_CRD_TMP/strimzi-kafka-operator/crds/" --server-side --force-conflicts +rm -rf "$_STRIMZI_CRD_TMP" + # Wait for Strimzi operator to be ready echo " Waiting for Strimzi operator deployment..." kubectl wait --for=condition=available --timeout=300s deployment/strimzi-cluster-operator -n telemetry diff --git a/provision/roles/telemetry/templates/telemetry/vector/vector-ome-kafkauser.yaml.j2 b/provision/roles/telemetry/templates/telemetry/vector/vector-ome-kafkauser.yaml.j2 index 2cad636058..28e69319f3 100644 --- a/provision/roles/telemetry/templates/telemetry/vector/vector-ome-kafkauser.yaml.j2 +++ b/provision/roles/telemetry/templates/telemetry/vector/vector-ome-kafkauser.yaml.j2 @@ -9,7 +9,7 @@ # OME is an external producer with a different security domain, so it gets a # dedicated, least-privilege KafkaUser. -apiVersion: kafka.strimzi.io/v1beta2 +apiVersion: kafka.strimzi.io/v1 kind: KafkaUser metadata: name: {{ vector.ome.kafka_user }} diff --git a/provision/roles/telemetry/vars/main.yml b/provision/roles/telemetry/vars/main.yml index 4f1f29f9fa..1337efb093 100644 --- a/provision/roles/telemetry/vars/main.yml +++ b/provision/roles/telemetry/vars/main.yml @@ -116,9 +116,9 @@ kafka: lb_service_name: "kafka-loadbalancer" container_port1: 9093 # Kafka images from service_k8s_v.json - operator_image: "{{ telemetry_images['strimzi/operator'] | default('quay.io/strimzi/operator:0.48.0') }}" - kafka_image: "{{ telemetry_images['strimzi/kafka'] | default('quay.io/strimzi/kafka:0.48.0-kafka-4.1.0') }}" - bridge_image: "{{ telemetry_images['strimzi/kafka-bridge'] | default('quay.io/strimzi/kafka-bridge:0.33.1') }}" + operator_image: "{{ telemetry_images['strimzi/operator'] | default('quay.io/strimzi/operator:1.0.1') }}" + kafka_image: "{{ telemetry_images['strimzi/kafka'] | default('quay.io/strimzi/kafka:1.0.1-kafka-4.2.0') }}" + bridge_image: "{{ telemetry_images['strimzi/kafka-bridge'] | default('quay.io/strimzi/kafka-bridge:1.0.0') }}" container_port2: 9093 image: "apache/kafka:4.1.0" cluster_id: "kafka-cluster-id" diff --git a/upgrade/roles/upgrade_telemetry/files/migrate_strimzi_crds.sh b/upgrade/roles/upgrade_telemetry/files/migrate_strimzi_crds.sh new file mode 100644 index 0000000000..c9c98e828b --- /dev/null +++ b/upgrade/roles/upgrade_telemetry/files/migrate_strimzi_crds.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# migrate_strimzi_crds.sh — Strimzi CRD major version migration +# +# Handles the upgrade from Strimzi 0.x (v1beta2) to 1.x (v1-only). +# Strimzi 1.0.x completely dropped the v1beta2 API. Kubernetes +# cannot remove a served version when objects stored in that version +# still exist in etcd. This script: +# +# 1. Detects whether migration is needed +# 2. Temporarily re-enables v1beta2 on CRDs so stuck CRs are readable +# 3. Deletes existing Kafka CRs (they will be recreated by telemetry.sh) +# 4. Deletes old PVCs (new cluster ID makes old data incompatible) +# 5. Removes CRDs (handles stuck cleanup finalizers) +# +# telemetry.sh then recreates CRDs + CRs from the new chart. +# This script is fully idempotent — it is a no-op when CRDs are +# already healthy, absent, or running v1 without issues. +# +# Usage: bash migrate_strimzi_crds.sh +# Exit codes: 0 = success or no migration needed + +set -euo pipefail + +NS="${1:-telemetry}" + +# ── Phase 1: Detect ───────────────────────────────────────────── +needs_migration=false + +# Check if any Strimzi CRD still lists v1beta2 in storedVersions +for crd in $(kubectl get crd -o name 2>/dev/null | grep -E '\.kafka\.strimzi\.io|\.core\.strimzi\.io'); do + if kubectl get "$crd" -o jsonpath='{.status.storedVersions}' 2>/dev/null | grep -q 'v1beta2'; then + echo "[MIGRATE] $crd has v1beta2 in storedVersions" + needs_migration=true + break + fi +done + +# Check if CRs are stuck (v1-only CRDs but objects stored as v1beta2) +if [ "$needs_migration" = "false" ] && kubectl get crd kafkas.kafka.strimzi.io >/dev/null 2>&1; then + if kubectl get kafka -n "$NS" 2>&1 | grep -q 'convert CR from an invalid group/version'; then + echo "[MIGRATE] CRs stuck — conversion error detected" + needs_migration=true + fi +fi + +if [ "$needs_migration" = "false" ]; then + echo "[MIGRATE] No Strimzi CRD migration needed." + exit 0 +fi + +echo "[MIGRATE] Starting Strimzi CRD migration (v1beta2 → v1)..." + +# ── Phase 2: Make stuck CRs readable ──────────────────────────── +STRIMZI_CRDS=$(kubectl get crd -o name 2>/dev/null \ + | grep -E '\.kafka\.strimzi\.io|\.core\.strimzi\.io' \ + | sed 's|customresourcedefinition.apiextensions.k8s.io/||') + +if [ -n "$STRIMZI_CRDS" ]; then + echo "[MIGRATE] Temporarily adding v1beta2 to CRDs..." + for crd in $STRIMZI_CRDS; do + kubectl get crd "$crd" -o json 2>/dev/null \ + | jq '.spec.versions += [(.spec.versions[0] | .name = "v1beta2" | .served = true | .storage = false)]' \ + | kubectl apply -f - --server-side --force-conflicts >/dev/null 2>&1 || true + done +fi + +# ── Phase 3: Delete existing CRs ──────────────────────────────── +echo "[MIGRATE] Deleting existing Kafka CRs..." +for kind in kafka kafkanodepool kafkabridge kafkatopic kafkauser strimzipodset; do + for item in $(kubectl get "$kind" -n "$NS" -o name 2>/dev/null); do + kubectl patch "$item" -n "$NS" --type=merge \ + -p '{"metadata":{"finalizers":[]}}' 2>/dev/null || true + kubectl delete "$item" -n "$NS" --wait=false 2>/dev/null || true + done +done +sleep 5 + +# ── Phase 4: Delete old Kafka PVCs ─────────────────────────────── +echo "[MIGRATE] Deleting old Kafka PVCs (new cluster ID makes old data incompatible)..." +kubectl delete pvc -n "$NS" -l strimzi.io/cluster=kafka --wait=false 2>/dev/null || true + +# ── Phase 5: Delete cluster-id secret (operator will regenerate) ─ +kubectl delete secret kafka-cluster-id -n "$NS" 2>/dev/null || true + +# ── Phase 6: Delete CRDs ──────────────────────────────────────── +if [ -n "$STRIMZI_CRDS" ]; then + echo "[MIGRATE] Deleting Strimzi CRDs..." + kubectl delete crd $STRIMZI_CRDS --wait=false --timeout=30s 2>&1 || true + sleep 5 + # Remove cleanup finalizers from any CRDs stuck in Terminating + for crd in $(kubectl get crd -o name 2>/dev/null | grep -E '\.strimzi\.io'); do + kubectl patch "$crd" --type=merge \ + -p '{"metadata":{"finalizers":[]}}' 2>/dev/null || true + done + # Wait for CRDs to fully disappear + for i in $(seq 1 24); do + remaining=$(kubectl get crd -o name 2>/dev/null | grep -cE '\.strimzi\.io' || echo 0) + [ "$remaining" -eq 0 ] 2>/dev/null && break + sleep 5 + done +fi + +echo "[MIGRATE] Strimzi CRD migration complete. telemetry.sh will recreate CRDs and CRs." diff --git a/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml b/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml index 1e8b7aa3f0..085801b9ab 100644 --- a/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml +++ b/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml @@ -150,7 +150,7 @@ - name: Reclaim preserved IPs from conflicting services when: - preserved_vminsert_ip | default('') | length > 0 or preserved_vmselect_ip | default('') | length > 0 - - vminsert_lb_ip.stdout | trim | length == 0 or vmselect_lb_ip.stdout | trim | length == 0 + - (vminsert_lb_ip.stdout | default('') | trim | length == 0) or (vmselect_lb_ip.stdout | default('') | trim | length == 0) block: - name: Stage IP conflict detection script ansible.builtin.template: diff --git a/upgrade/roles/upgrade_telemetry/tasks/execute_telemetry_sh.yml b/upgrade/roles/upgrade_telemetry/tasks/execute_telemetry_sh.yml index 1dac883990..54a2ae922f 100644 --- a/upgrade/roles/upgrade_telemetry/tasks/execute_telemetry_sh.yml +++ b/upgrade/roles/upgrade_telemetry/tasks/execute_telemetry_sh.yml @@ -67,6 +67,22 @@ ansible.builtin.debug: msg: "{{ pods_before_upgrade.stdout_lines }}" + # ── Pre-CRD migration: Strimzi major version upgrade (0.x → 1.x) ── + # See files/migrate_strimzi_crds.sh for full details. + # The script is idempotent — no-op when CRDs are already healthy or absent. + - name: Run Strimzi CRD migration if needed (v1beta2 → v1) + ansible.builtin.script: + cmd: migrate_strimzi_crds.sh {{ telemetry_namespace }} + delegate_to: "{{ kube_vip }}" + connection: ssh + register: strimzi_migration_result + changed_when: "'Starting Strimzi CRD migration' in strimzi_migration_result.stdout" + failed_when: strimzi_migration_result.rc != 0 + + - name: Display Strimzi migration result + ansible.builtin.debug: + msg: "{{ strimzi_migration_result.stdout_lines }}" + # ── Execute telemetry.sh ── - name: Execute telemetry.sh on kube_vip ansible.builtin.command: