Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
{ "package": "cffi==1.17.1", "type": "pip_module" },
{ "package": "prometheus_client==0.20.0", "type": "pip_module" },
{ "package": "kubernetes==33.1.0", "type": "pip_module" },
{ "package": "quay.io/strimzi/operator", "tag": "0.48.0", "type": "image" },
{ "package": "quay.io/strimzi/kafka", "tag": "0.48.0-kafka-4.1.0", "type": "image" },
{ "package": "quay.io/strimzi/operator", "tag": "1.0.1", "type": "image" },
{ "package": "quay.io/strimzi/kafka", "tag": "1.0.1-kafka-4.2.0", "type": "image" },
{ "package": "docker.io/dellhpcomniaaisolution/ubuntu-ldms", "tag": "1.1", "type": "image" },
{ "package": "quay.io/dell/container-storage-modules/csm-metrics-powerscale", "tag": "v1.12.0", "type": "image" },
{ "package": "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", "tag": "0.150.1", "type": "image" },
Expand All @@ -46,8 +46,8 @@
{ "package": "quay.io/jetstack/cert-manager-webhook", "tag": "v1.10.0", "type": "image" },
{ "package": "quay.io/jetstack/cert-manager-acmesolver", "tag": "v1.10.0", "type": "image" },
{ "package": "cert-manager-v1.10.0", "type": "tarball", "url": "https://charts.jetstack.io/charts/cert-manager-v1.10.0.tgz" },
{ "package": "strimzi-kafka-operator-helm-3-chart-0.48.0", "type": "tarball", "url": "https://github.com/strimzi/strimzi-kafka-operator/releases/download/0.48.0/strimzi-kafka-operator-helm-3-chart-0.48.0.tgz" },
{ "package": "quay.io/strimzi/kafka-bridge", "tag": "0.33.1", "type": "image" },
{ "package": "strimzi-kafka-operator-helm-3-chart-1.0.1", "type": "tarball", "url": "https://github.com/strimzi/strimzi-kafka-operator/releases/download/1.0.1/strimzi-kafka-operator-helm-3-chart-1.0.1.tgz" },
{ "package": "quay.io/strimzi/kafka-bridge", "tag": "1.0.0", "type": "image" },
{ "package": "docker.io/victoriametrics/operator", "tag": "v0.68.3", "type": "image" },
{ "package": "docker.io/victoriametrics/operator", "tag": "config-reloader-v0.68.3", "type": "image" },
{ "package": "victoria-metrics-operator-0.59.3", "type": "tarball", "url": "https://github.com/VictoriaMetrics/helm-charts/releases/download/victoria-metrics-operator-0.59.3/victoria-metrics-operator-0.59.3.tgz" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ server4:
# -------------------------------------------------------------------
# Multi-subnet configuration (requires coresmd v0.6.x+)
# To enable multi-subnet DHCP:
# 1. Pull the new coresmd image: podman pull ghcr.io/openchami/coresmd:v0.6.x
# 1. Pull the new coresmd image: podman pull ghcr.io/openchami/coresmd:v0.6.3
# 2. Comment out the single-subnet coresmd and bootloop lines above
# 3. Uncomment the multi-subnet coresmd and bootloop blocks below
# 4. Replace the new coresmd image version in files: /etc/containers/systemd/coresmd-coredhcp.container /etc/containers/systemd/coresmd-coredns.container with the old version
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: kafka.strimzi.io/v1beta2
apiVersion: kafka.strimzi.io/v1
kind: KafkaNodePool
metadata:
name: controller
Expand All @@ -19,7 +19,7 @@ spec:
deleteClaim: false
---

apiVersion: kafka.strimzi.io/v1beta2
apiVersion: kafka.strimzi.io/v1
kind: KafkaNodePool
metadata:
name: broker
Expand All @@ -40,7 +40,7 @@ spec:
deleteClaim: false
---

apiVersion: kafka.strimzi.io/v1beta2
apiVersion: kafka.strimzi.io/v1
kind: Kafka
metadata:
name: kafka
Expand All @@ -50,8 +50,8 @@ metadata:
strimzi.io/kraft: enabled
spec:
kafka:
version: 4.1.0
metadataVersion: 4.1-IV0
version: 4.2.0
metadataVersion: 4.2-IV0
listeners:
- name: internal
port: 9092
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
---
apiVersion: kafka.strimzi.io/v1beta2
apiVersion: kafka.strimzi.io/v1
kind: KafkaBridge
metadata:
name: bridge
namespace: telemetry
spec:
bootstrapServers: kafka-kafka-bootstrap:9093
enableMetrics: true
http:
port: 8080
# Enable TLS for Kafka connection
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

---
apiVersion: kafka.strimzi.io/v1beta2
apiVersion: kafka.strimzi.io/v1
kind: KafkaUser
metadata:
name: kafkapump
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: kafka.strimzi.io/v1beta2
apiVersion: kafka.strimzi.io/v1
kind: KafkaTopic
metadata:
name: {{ topic_name }}
Expand Down
9 changes: 9 additions & 0 deletions provision/roles/telemetry/templates/telemetry/telemetry.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ else
helm -n telemetry install strimzi-cluster-operator "${DEPLOY_DIR}/{{ strimzi_kafka_pkg }}.tar.gz"
fi

# Helm 3 does NOT update CRDs on 'helm upgrade'. Explicitly apply CRDs
# from the chart tarball so that new API versions are registered before
# kubectl apply -k attempts to create Kafka resources.
echo " Applying Strimzi CRDs from chart (Helm 3 does not update CRDs on upgrade)..."
_STRIMZI_CRD_TMP=$(mktemp -d)
tar -xzf "${DEPLOY_DIR}/{{ strimzi_kafka_pkg }}.tar.gz" -C "$_STRIMZI_CRD_TMP"
kubectl apply -f "$_STRIMZI_CRD_TMP/strimzi-kafka-operator/crds/" --server-side --force-conflicts
rm -rf "$_STRIMZI_CRD_TMP"

# Wait for Strimzi operator to be ready
echo " Waiting for Strimzi operator deployment..."
kubectl wait --for=condition=available --timeout=300s deployment/strimzi-cluster-operator -n telemetry
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# OME is an external producer with a different security domain, so it gets a
# dedicated, least-privilege KafkaUser.

apiVersion: kafka.strimzi.io/v1beta2
apiVersion: kafka.strimzi.io/v1
kind: KafkaUser
metadata:
name: {{ vector.ome.kafka_user }}
Expand Down
6 changes: 3 additions & 3 deletions provision/roles/telemetry/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ kafka:
lb_service_name: "kafka-loadbalancer"
container_port1: 9093
# Kafka images from service_k8s_v<version>.json
operator_image: "{{ telemetry_images['strimzi/operator'] | default('quay.io/strimzi/operator:0.48.0') }}"
kafka_image: "{{ telemetry_images['strimzi/kafka'] | default('quay.io/strimzi/kafka:0.48.0-kafka-4.1.0') }}"
bridge_image: "{{ telemetry_images['strimzi/kafka-bridge'] | default('quay.io/strimzi/kafka-bridge:0.33.1') }}"
operator_image: "{{ telemetry_images['strimzi/operator'] | default('quay.io/strimzi/operator:1.0.1') }}"
kafka_image: "{{ telemetry_images['strimzi/kafka'] | default('quay.io/strimzi/kafka:1.0.1-kafka-4.2.0') }}"
bridge_image: "{{ telemetry_images['strimzi/kafka-bridge'] | default('quay.io/strimzi/kafka-bridge:1.0.0') }}"
container_port2: 9093
image: "apache/kafka:4.1.0"
cluster_id: "kafka-cluster-id"
Expand Down
103 changes: 103 additions & 0 deletions upgrade/roles/upgrade_telemetry/files/migrate_strimzi_crds.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/bin/bash
# migrate_strimzi_crds.sh — Strimzi CRD major version migration
#
# Handles the upgrade from Strimzi 0.x (v1beta2) to 1.x (v1-only).
# Strimzi 1.0.x completely dropped the v1beta2 API. Kubernetes
# cannot remove a served version when objects stored in that version
# still exist in etcd. This script:
#
# 1. Detects whether migration is needed
# 2. Temporarily re-enables v1beta2 on CRDs so stuck CRs are readable
# 3. Deletes existing Kafka CRs (they will be recreated by telemetry.sh)
# 4. Deletes old PVCs (new cluster ID makes old data incompatible)
# 5. Removes CRDs (handles stuck cleanup finalizers)
#
# telemetry.sh then recreates CRDs + CRs from the new chart.
# This script is fully idempotent — it is a no-op when CRDs are
# already healthy, absent, or running v1 without issues.
#
# Usage: bash migrate_strimzi_crds.sh <namespace>
# Exit codes: 0 = success or no migration needed

set -euo pipefail

NS="${1:-telemetry}"

# ── Phase 1: Detect ─────────────────────────────────────────────
needs_migration=false

# Check if any Strimzi CRD still lists v1beta2 in storedVersions
for crd in $(kubectl get crd -o name 2>/dev/null | grep -E '\.kafka\.strimzi\.io|\.core\.strimzi\.io'); do
if kubectl get "$crd" -o jsonpath='{.status.storedVersions}' 2>/dev/null | grep -q 'v1beta2'; then
echo "[MIGRATE] $crd has v1beta2 in storedVersions"
needs_migration=true
break
fi
done

# Check if CRs are stuck (v1-only CRDs but objects stored as v1beta2)
if [ "$needs_migration" = "false" ] && kubectl get crd kafkas.kafka.strimzi.io >/dev/null 2>&1; then
if kubectl get kafka -n "$NS" 2>&1 | grep -q 'convert CR from an invalid group/version'; then
echo "[MIGRATE] CRs stuck — conversion error detected"
needs_migration=true
fi
fi

if [ "$needs_migration" = "false" ]; then
echo "[MIGRATE] No Strimzi CRD migration needed."
exit 0
fi

echo "[MIGRATE] Starting Strimzi CRD migration (v1beta2 → v1)..."

# ── Phase 2: Make stuck CRs readable ────────────────────────────
STRIMZI_CRDS=$(kubectl get crd -o name 2>/dev/null \
| grep -E '\.kafka\.strimzi\.io|\.core\.strimzi\.io' \
| sed 's|customresourcedefinition.apiextensions.k8s.io/||')

if [ -n "$STRIMZI_CRDS" ]; then
echo "[MIGRATE] Temporarily adding v1beta2 to CRDs..."
for crd in $STRIMZI_CRDS; do
kubectl get crd "$crd" -o json 2>/dev/null \
| jq '.spec.versions += [(.spec.versions[0] | .name = "v1beta2" | .served = true | .storage = false)]' \
| kubectl apply -f - --server-side --force-conflicts >/dev/null 2>&1 || true
done
fi

# ── Phase 3: Delete existing CRs ────────────────────────────────
echo "[MIGRATE] Deleting existing Kafka CRs..."
for kind in kafka kafkanodepool kafkabridge kafkatopic kafkauser strimzipodset; do
for item in $(kubectl get "$kind" -n "$NS" -o name 2>/dev/null); do
kubectl patch "$item" -n "$NS" --type=merge \
-p '{"metadata":{"finalizers":[]}}' 2>/dev/null || true
kubectl delete "$item" -n "$NS" --wait=false 2>/dev/null || true
done
done
sleep 5

# ── Phase 4: Delete old Kafka PVCs ───────────────────────────────
echo "[MIGRATE] Deleting old Kafka PVCs (new cluster ID makes old data incompatible)..."
kubectl delete pvc -n "$NS" -l strimzi.io/cluster=kafka --wait=false 2>/dev/null || true

# ── Phase 5: Delete cluster-id secret (operator will regenerate) ─
kubectl delete secret kafka-cluster-id -n "$NS" 2>/dev/null || true

# ── Phase 6: Delete CRDs ────────────────────────────────────────
if [ -n "$STRIMZI_CRDS" ]; then
echo "[MIGRATE] Deleting Strimzi CRDs..."
kubectl delete crd $STRIMZI_CRDS --wait=false --timeout=30s 2>&1 || true
sleep 5
# Remove cleanup finalizers from any CRDs stuck in Terminating
for crd in $(kubectl get crd -o name 2>/dev/null | grep -E '\.strimzi\.io'); do
kubectl patch "$crd" --type=merge \
-p '{"metadata":{"finalizers":[]}}' 2>/dev/null || true
done
# Wait for CRDs to fully disappear
for i in $(seq 1 24); do
remaining=$(kubectl get crd -o name 2>/dev/null | grep -cE '\.strimzi\.io' || echo 0)
[ "$remaining" -eq 0 ] 2>/dev/null && break
sleep 5
done
fi

echo "[MIGRATE] Strimzi CRD migration complete. telemetry.sh will recreate CRDs and CRs."
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
- name: Reclaim preserved IPs from conflicting services
when:
- preserved_vminsert_ip | default('') | length > 0 or preserved_vmselect_ip | default('') | length > 0
- vminsert_lb_ip.stdout | trim | length == 0 or vmselect_lb_ip.stdout | trim | length == 0
- (vminsert_lb_ip.stdout | default('') | trim | length == 0) or (vmselect_lb_ip.stdout | default('') | trim | length == 0)
block:
- name: Stage IP conflict detection script
ansible.builtin.template:
Expand Down
16 changes: 16 additions & 0 deletions upgrade/roles/upgrade_telemetry/tasks/execute_telemetry_sh.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,22 @@
ansible.builtin.debug:
msg: "{{ pods_before_upgrade.stdout_lines }}"

# ── Pre-CRD migration: Strimzi major version upgrade (0.x → 1.x) ──
# See files/migrate_strimzi_crds.sh for full details.
# The script is idempotent — no-op when CRDs are already healthy or absent.
- name: Run Strimzi CRD migration if needed (v1beta2 → v1)
ansible.builtin.script:
cmd: migrate_strimzi_crds.sh {{ telemetry_namespace }}
delegate_to: "{{ kube_vip }}"
connection: ssh
register: strimzi_migration_result
changed_when: "'Starting Strimzi CRD migration' in strimzi_migration_result.stdout"
failed_when: strimzi_migration_result.rc != 0

- name: Display Strimzi migration result
ansible.builtin.debug:
msg: "{{ strimzi_migration_result.stdout_lines }}"

# ── Execute telemetry.sh ──
- name: Execute telemetry.sh on kube_vip
ansible.builtin.command:
Expand Down
Loading