From e61200b2da5f7f0f5f83c7fa9e855c34845843d8 Mon Sep 17 00:00:00 2001 From: priti-parate <140157516+priti-parate@users.noreply.github.com> Date: Sun, 14 Jun 2026 08:27:53 +0530 Subject: [PATCH] Powerscale telemetry version upgrade and preserving loadbalancer IP for victoria (#4743) * upgrade defects fixes and fix for crashloopback on pod restart Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * remove stale services and deployments for victoria Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert changes as it si taken care in another Pr Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert idrac terminationgraceperiod Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * ansible lint fixes Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * rescue block for upgrade telemetry Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert upgrade telemetry Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * default size of idrac telemetry containers Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * add new line Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * update values in upgrade path Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * updating values in integer instead decimal Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert service k8s json file Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * powescale telemetry upgrade and preserve loadbalancer IP for Victoria Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * powerscale telemetry version upgrade Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * ansible lint fixes Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * update software_config with updated csi driver version Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * upgrade powerscale values.yml Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert kafka patch variables Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * update delegation as mount_on_oim can be false also Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * update vars Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> --------- Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> --- .../x86_64/rhel/10.0/service_k8s_v1.35.1.json | 1 + .../scripts/merge_powerscale_values.py | 110 +++++++++++------- .../scripts/transform_software_config.py | 2 +- .../tasks/apply_victoria_crs.yml | 73 ++++++++++++ .../tasks/backup_telemetry.yml | 22 ++-- .../tasks/backup_victoria.yml | 2 +- .../tasks/migrate_statefulset.yml | 80 +++++++++++++ upgrade/roles/upgrade_telemetry/vars/main.yml | 16 ++- 8 files changed, 246 insertions(+), 60 deletions(-) diff --git a/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json b/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json index af9476e8ea..8f21b2bf51 100644 --- a/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json +++ b/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json @@ -1,3 +1,4 @@ + { "service_k8s": { "cluster": [ diff --git a/upgrade/roles/import_input_parameters/scripts/merge_powerscale_values.py b/upgrade/roles/import_input_parameters/scripts/merge_powerscale_values.py index 0f236c2027..eefd833b4a 100755 --- a/upgrade/roles/import_input_parameters/scripts/merge_powerscale_values.py +++ b/upgrade/roles/import_input_parameters/scripts/merge_powerscale_values.py @@ -22,24 +22,24 @@ import yaml -def merge_values(v21_file_path, v216_file_path, output_file_path): +def merge_values(vold_file_path, vnew_file_path, output_file_path): """ - Merge v2.1 PowerScale values into v2.16 template. + Merge old PowerScale values into new template. Args: - v21_file_path: Path to v2.1 values.yaml (source settings) - v216_file_path: Path to v2.16 values.yaml (target structure) + vold_file_path: Path to old values.yaml (source settings) + vnew_file_path: Path to new values.yaml (target structure) output_file_path: Path to write merged values.yaml """ - # Load v2.1 values (source of user settings) - with open(v21_file_path, 'r', encoding='utf-8') as file_handle: - v21_values = yaml.safe_load(file_handle) + # Load old values (source of user settings) + with open(vold_file_path, 'r', encoding='utf-8') as file_handle: + vold_values = yaml.safe_load(file_handle) - # Load v2.16 values (target structure with new defaults) - with open(v216_file_path, 'r', encoding='utf-8') as file_handle: - v216_values = yaml.safe_load(file_handle) + # Load new values (target structure with new defaults) + with open(vnew_file_path, 'r', encoding='utf-8') as file_handle: + vnew_values = yaml.safe_load(file_handle) - # Parameters to preserve from v2.1 + # Parameters to preserve from old version preserve_params = [ 'isiPath', 'isiAccessZone', @@ -49,12 +49,12 @@ def merge_values(v21_file_path, v216_file_path, output_file_path): # Preserve top-level parameters for param in preserve_params: - if param in v21_values: - v216_values[param] = v21_values[param] - print(f"Preserved {param}: {v21_values[param]}", + if param in vold_values: + vnew_values[param] = vold_values[param] + print(f"Preserved {param}: {vold_values[param]}", file=sys.stderr) - # Preserve feature flags if enabled in v2.1 + # Preserve feature flags if enabled in old version feature_flags = [ 'storageCapacity', 'podmon', @@ -64,62 +64,88 @@ def merge_values(v21_file_path, v216_file_path, output_file_path): ] for feature in feature_flags: - if feature in v21_values and isinstance(v21_values[feature], dict): - if 'enabled' in v21_values[feature]: - if feature not in v216_values: - v216_values[feature] = {} - v216_values[feature]['enabled'] = \ - v21_values[feature]['enabled'] + if feature in vold_values and isinstance(vold_values[feature], dict): + if 'enabled' in vold_values[feature]: + if feature not in vnew_values: + vnew_values[feature] = {} + vnew_values[feature]['enabled'] = \ + vold_values[feature]['enabled'] print(f"Preserved {feature}.enabled: " - f"{v21_values[feature]['enabled']}", + f"{vold_values[feature]['enabled']}", file=sys.stderr) + # Preserve healthMonitor with both enabled and interval + if 'healthMonitor' in vold_values and isinstance(vold_values['healthMonitor'], dict): + if 'healthMonitor' not in vnew_values: + vnew_values['healthMonitor'] = {} + for param in ['enabled', 'interval']: + if param in vold_values['healthMonitor']: + vnew_values['healthMonitor'][param] = vold_values['healthMonitor'][param] + print(f"Preserved healthMonitor.{param}: {vold_values['healthMonitor'][param]}", file=sys.stderr) + # Preserve controller settings - if 'controller' in v21_values and \ - isinstance(v21_values['controller'], dict): - if 'controller' not in v216_values: - v216_values['controller'] = {} + if 'controller' in vold_values and \ + isinstance(vold_values['controller'], dict): + if 'controller' not in vnew_values: + vnew_values['controller'] = {} controller_params = ['nodeSelector', 'tolerations', 'controllerCount'] for param in controller_params: - if param in v21_values['controller']: - v216_values['controller'][param] = \ - v21_values['controller'][param] - print(f"Preserved controller.{param}: {v21_values['controller'][param]}", file=sys.stderr) + if param in vold_values['controller']: + vnew_values['controller'][param] = \ + vold_values['controller'][param] + print(f"Preserved controller.{param}: {vold_values['controller'][param]}", file=sys.stderr) + + # Preserve controller-level healthMonitor + if 'healthMonitor' in vold_values['controller'] and isinstance(vold_values['controller']['healthMonitor'], dict): + if 'healthMonitor' not in vnew_values['controller']: + vnew_values['controller']['healthMonitor'] = {} + for param in ['enabled', 'interval']: + if param in vold_values['controller']['healthMonitor']: + vnew_values['controller']['healthMonitor'][param] = vold_values['controller']['healthMonitor'][param] + print(f"Preserved controller.healthMonitor.{param}: {vold_values['controller']['healthMonitor'][param]}", file=sys.stderr) # Preserve node settings - if 'node' in v21_values and isinstance(v21_values['node'], dict): - if 'node' not in v216_values: - v216_values['node'] = {} + if 'node' in vold_values and isinstance(vold_values['node'], dict): + if 'node' not in vnew_values: + vnew_values['node'] = {} node_params = ['nodeSelector', 'tolerations'] for param in node_params: - if param in v21_values['node']: - v216_values['node'][param] = v21_values['node'][param] + if param in vold_values['node']: + vnew_values['node'][param] = vold_values['node'][param] print(f"Preserved node.{param}", file=sys.stderr) + # Preserve node-level healthMonitor + if 'healthMonitor' in vold_values['node'] and isinstance(vold_values['node']['healthMonitor'], dict): + if 'healthMonitor' not in vnew_values['node']: + vnew_values['node']['healthMonitor'] = {} + if 'enabled' in vold_values['node']['healthMonitor']: + vnew_values['node']['healthMonitor']['enabled'] = vold_values['node']['healthMonitor']['enabled'] + print(f"Preserved node.healthMonitor.enabled: {vold_values['node']['healthMonitor']['enabled']}", file=sys.stderr) + # Write merged values to output file with open(output_file_path, 'w', encoding='utf-8') as file_handle: - yaml.dump(v216_values, file_handle, + yaml.dump(vnew_values, file_handle, default_flow_style=False, sort_keys=False) - print("Successfully merged v2.1 settings into v2.16 values.yaml", + print("Successfully merged old settings into new values.yaml", file=sys.stderr) print(f"Output written to: {output_file_path}", file=sys.stderr) if __name__ == '__main__': if len(sys.argv) != 4: - print("Usage: merge_powerscale_values.py " - " ", file=sys.stderr) + print("Usage: merge_powerscale_values.py " + " ", file=sys.stderr) sys.exit(1) - v21_input = sys.argv[1] - v216_input = sys.argv[2] + vold_input = sys.argv[1] + vnew_input = sys.argv[2] output_path = sys.argv[3] try: - merge_values(v21_input, v216_input, output_path) + merge_values(vold_input, vnew_input, output_path) except (IOError, yaml.YAMLError) as error: print(f"ERROR: Failed to merge PowerScale values.yaml: {error}", file=sys.stderr) diff --git a/upgrade/roles/import_input_parameters/scripts/transform_software_config.py b/upgrade/roles/import_input_parameters/scripts/transform_software_config.py index 9a314e7614..03ae8c47c6 100644 --- a/upgrade/roles/import_input_parameters/scripts/transform_software_config.py +++ b/upgrade/roles/import_input_parameters/scripts/transform_software_config.py @@ -23,7 +23,7 @@ # These are the target versions for software entries that should be updated TARGET_VERSIONS = { "service_k8s": "1.35.1", - "csi_driver_powerscale": "v2.16.0" + "csi_driver_powerscale": "v2.17.0" } with open(backup_file, 'r', encoding='utf-8') as f: diff --git a/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml b/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml index 488c39b72c..d725cf067a 100644 --- a/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml +++ b/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml @@ -50,6 +50,79 @@ delegate_to: "{{ kube_vip }}" connection: ssh +# ── Inject preserved LoadBalancer IPs into VMCluster manifest before apply ── +# When migrating from 2.1 StatefulSet to operator, old services are deleted +# and the operator creates new ones. To preserve IPs, we inject loadBalancerIP +# directly into the VMCluster CR's serviceSpec BEFORE applying, so the operator +# creates services with the correct IPs from the start (no race condition). +- name: Create LoadBalancer IP injection script + ansible.builtin.copy: + dest: /tmp/inject_vm_lb_ips.py + mode: "0755" + content: | + #!/usr/bin/env python3 + import yaml + import sys + manifest_path = sys.argv[1] + vmselect_ip = sys.argv[2] if len(sys.argv) > 2 and sys.argv[2] else "" + vminsert_ip = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] else "" + with open(manifest_path) as f: + doc = yaml.safe_load(f) + spec = doc.get("spec", {}) + changed = False + if vmselect_ip and "vmselect" in spec: + svc = spec["vmselect"].setdefault("serviceSpec", {}).setdefault("spec", {}) + if svc.get("loadBalancerIP") != vmselect_ip: + svc["loadBalancerIP"] = vmselect_ip + changed = True + if vminsert_ip and "vminsert" in spec: + svc = spec["vminsert"].setdefault("serviceSpec", {}).setdefault("spec", {}) + if svc.get("loadBalancerIP") != vminsert_ip: + svc["loadBalancerIP"] = vminsert_ip + changed = True + if changed: + with open(manifest_path, "w") as f: + yaml.dump(doc, f, default_flow_style=False, sort_keys=False) + print("Injected vmselect=" + vmselect_ip + " vminsert=" + vminsert_ip) + else: + print("IPs already present - no change needed") + sys.exit(0 if changed else 2) + delegate_to: "{{ kube_vip }}" + connection: ssh + when: + - preserved_vmselect_ip | default('') | length > 0 or preserved_vminsert_ip | default('') | length > 0 + +- name: Inject preserved LoadBalancer IPs into VMCluster manifest + ansible.builtin.command: + cmd: >- + python3 /tmp/inject_vm_lb_ips.py + "{{ telemetry_deploy_dir }}/deployments/victoria-operator-vmcluster.yaml" + "{{ preserved_vmselect_ip | default('') }}" + "{{ preserved_vminsert_ip | default('') }}" + register: ip_inject_result + changed_when: ip_inject_result.rc == 0 + failed_when: ip_inject_result.rc not in [0, 2] + delegate_to: "{{ kube_vip }}" + connection: ssh + when: + - preserved_vmselect_ip | default('') | length > 0 or preserved_vminsert_ip | default('') | length > 0 + +- name: Clean up LoadBalancer IP injection script + ansible.builtin.file: + path: /tmp/inject_vm_lb_ips.py + state: absent + delegate_to: "{{ kube_vip }}" + connection: ssh + changed_when: false + +- name: Display LoadBalancer IP injection status + ansible.builtin.debug: + msg: >- + {{ victoria_lb_ips_preserved + if (preserved_vminsert_ip | default('') | length > 0) + or (preserved_vmselect_ip | default('') | length > 0) + else victoria_lb_ips_not_preserved }} + # ── Apply main CR (VMCluster only — 2.2 cluster mode only) ── - name: Apply VMCluster CR (cluster mode only) with retry ansible.builtin.command: diff --git a/upgrade/roles/upgrade_telemetry/tasks/backup_telemetry.yml b/upgrade/roles/upgrade_telemetry/tasks/backup_telemetry.yml index ed0ff59f83..fcc2185d4e 100644 --- a/upgrade/roles/upgrade_telemetry/tasks/backup_telemetry.yml +++ b/upgrade/roles/upgrade_telemetry/tasks/backup_telemetry.yml @@ -27,38 +27,34 @@ when: - k8s_client_mount_path is defined - k8s_client_mount_path | length > 0 + - kube_vip is defined + - kube_vip | length > 0 block: - - name: Set telemetry backup directory - ansible.builtin.set_fact: - tel_backup_dir: "{{ k8s_client_mount_path }}/upgrade/telemetry/omnia_{{ manifest.source_version | default('unknown') }}" - delegate_to: oim - connection: ssh - - name: Create telemetry backup directory ansible.builtin.file: - path: "{{ tel_backup_dir }}" + path: "{{ telemetry_backup_dir }}" state: directory mode: '0755' - delegate_to: oim + delegate_to: "{{ kube_vip }}" connection: ssh - name: Backup telemetry folder (pre-provision) ansible.builtin.copy: src: "{{ k8s_client_mount_path }}/telemetry" - dest: "{{ tel_backup_dir }}/telemetry" + dest: "{{ telemetry_backup_dir }}/telemetry" remote_src: true mode: preserve - delegate_to: oim + delegate_to: "{{ kube_vip }}" connection: ssh failed_when: false - name: Backup idrac_telemetry folder (pre-provision) ansible.builtin.copy: src: "{{ k8s_client_mount_path }}/idrac_telemetry" - dest: "{{ tel_backup_dir }}/idrac_telemetry" + dest: "{{ telemetry_backup_dir }}/idrac_telemetry" remote_src: true mode: preserve - delegate_to: oim + delegate_to: "{{ kube_vip }}" connection: ssh failed_when: false @@ -74,7 +70,7 @@ - name: Backup telemetry.sh from control plane ansible.builtin.copy: src: /root/telemetry.sh - dest: "{{ tel_backup_dir }}/telemetry.sh" + dest: "{{ telemetry_backup_dir }}/telemetry.sh" mode: "{{ executable_mode }}" remote_src: true delegate_to: "{{ kube_vip }}" diff --git a/upgrade/roles/upgrade_telemetry/tasks/backup_victoria.yml b/upgrade/roles/upgrade_telemetry/tasks/backup_victoria.yml index 9c6a487b78..abfdd4d107 100644 --- a/upgrade/roles/upgrade_telemetry/tasks/backup_victoria.yml +++ b/upgrade/roles/upgrade_telemetry/tasks/backup_victoria.yml @@ -21,7 +21,7 @@ path: "{{ telemetry_backup_dir }}" state: directory mode: "0755" - delegate_to: "{{ oim_host }}" + delegate_to: "{{ kube_vip }}" connection: ssh # ── Backup namespace-level resources ── diff --git a/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml b/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml index e99d7bf80c..23648e2ed6 100644 --- a/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml +++ b/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml @@ -53,6 +53,49 @@ delegate_to: "{{ kube_vip }}" connection: ssh + # ── Flush vmstorage data before shutdown ── + # Create snapshots on each vmstorage pod to force pending data/indexdb flush. + # This prevents corrupted parts.json from in-flight merges during shutdown. + - name: Get old vmstorage pod names + ansible.builtin.shell: | + set -o pipefail + kubectl -n {{ telemetry_namespace }} get pods -l {{ old_vm_pod_label }} --no-headers 2>/dev/null \ + | grep -i "storage\|vmstorage" | awk '{print $1}' + register: old_vmstorage_pods + changed_when: false + failed_when: false + delegate_to: "{{ kube_vip }}" + connection: ssh + + - name: Force snapshot on each vmstorage pod (flush pending writes) + ansible.builtin.shell: | + kubectl -n {{ telemetry_namespace }} exec {{ item }} -- \ + wget -q -O- --no-check-certificate "https://localhost:8482/snapshot/create" 2>/dev/null || \ + kubectl -n {{ telemetry_namespace }} exec {{ item }} -- \ + wget -q -O- "http://localhost:8482/snapshot/create" 2>/dev/null || true + loop: "{{ old_vmstorage_pods.stdout_lines | default([]) }}" + changed_when: false + failed_when: false + when: old_vmstorage_pods.stdout_lines | default([]) | length > 0 + delegate_to: "{{ kube_vip }}" + connection: ssh + + - name: Wait for background merges to settle after writes stopped + ansible.builtin.pause: + seconds: 30 + prompt: "{{ vmstorage_merge_wait_msg }}" + + # ── Ensure sufficient graceful shutdown period ── + # Old StatefulSet may have default 30s which is too short for indexdb flush + - name: Patch old StatefulSet terminationGracePeriodSeconds to 120s + ansible.builtin.shell: | + kubectl -n {{ telemetry_namespace }} patch statefulset {{ actual_old_statefulset }} \ + -p '{"spec":{"template":{"spec":{"terminationGracePeriodSeconds":120}}}}' + changed_when: true + failed_when: false + delegate_to: "{{ kube_vip }}" + connection: ssh + # ── Graceful shutdown of old StatefulSet ── - name: Scale down old StatefulSet ansible.builtin.command: @@ -84,6 +127,11 @@ delegate_to: "{{ kube_vip }}" connection: ssh + - name: Wait for storage cache flush after pod termination + ansible.builtin.pause: + seconds: 15 + prompt: "{{ storage_cache_flush_msg }}" + # ── PVC relabeling (data preservation via PV rebind) ── - name: Get all old PVCs from StatefulSet (using specific StatefulSet label) ansible.builtin.command: @@ -169,6 +217,38 @@ delegate_to: "{{ kube_vip }}" connection: ssh + # ── Capture LoadBalancer IPs before deletion ── + # Preserve existing IPs to prevent MetalLB from assigning new ones + # Only applicable for statefulset_to_operator migration path + - name: Get vminsert LoadBalancer IP + ansible.builtin.shell: | + kubectl -n {{ telemetry_namespace }} get svc vminsert -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "" + register: old_vminsert_ip + changed_when: false + failed_when: false + delegate_to: "{{ kube_vip }}" + connection: ssh + + - name: Get vmselect LoadBalancer IP + ansible.builtin.shell: | + kubectl -n {{ telemetry_namespace }} get svc vmselect -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "" + register: old_vmselect_ip + changed_when: false + failed_when: false + delegate_to: "{{ kube_vip }}" + connection: ssh + + - name: Set LoadBalancer IP facts for preservation + ansible.builtin.set_fact: + preserved_vminsert_ip: "{{ old_vminsert_ip.stdout | trim }}" + preserved_vmselect_ip: "{{ old_vmselect_ip.stdout | trim }}" + + - name: Display preserved LoadBalancer IPs + ansible.builtin.debug: + msg: + - "Preserving vminsert IP: {{ preserved_vminsert_ip if preserved_vminsert_ip else 'None' }}" + - "Preserving vmselect IP: {{ preserved_vmselect_ip if preserved_vmselect_ip else 'None' }}" + # ── Cleanup old pre-operator services and deployments ── # The operator creates new services with different names (e.g. vminsert-victoria-cluster), # so the old standalone services become stale and waste LoadBalancer IPs. diff --git a/upgrade/roles/upgrade_telemetry/vars/main.yml b/upgrade/roles/upgrade_telemetry/vars/main.yml index 8326120799..a869e5a52a 100644 --- a/upgrade/roles/upgrade_telemetry/vars/main.yml +++ b/upgrade/roles/upgrade_telemetry/vars/main.yml @@ -26,8 +26,7 @@ oim_host: oim executable_mode: "0755" # Upgrade directory paths (on k8s NFS share, resolved at runtime) -telemetry_upgrade_dir: "{{ k8s_client_mount_path }}/upgrade/telemetry" -telemetry_backup_dir: "{{ telemetry_upgrade_dir }}/omnia_{{ manifest.source_version | default('unknown') }}" +telemetry_backup_dir: "{{ k8s_client_mount_path }}/upgrade/backup/telemetry/omnia_{{ manifest.source_version | default('unknown') }}" # PV backup location (cluster-wide backup pre-provision) telemetry_pv_backup_file: "{{ telemetry_backup_dir }}/all_pvs.yaml" @@ -101,7 +100,12 @@ victoria_unhealthy_pods_warning: >- proceeds. The upgrade will re-create them with the new version. victoria_pods_deleted: "Deleted {{ victoria_unhealthy_pods | length }} unhealthy pod(s). Upgrade will re-create them." victoria_backup_completed: "Victoria backup completed: {{ telemetry_backup_dir }}" -victoria_crs_applied: "VictoriaMetrics CRs applied (mode: {{ victoria_deploy_mode }}" +victoria_crs_applied: "VictoriaMetrics CRs applied (mode: {{ victoria_deploy_mode }})" +victoria_lb_ips_preserved: >- + LoadBalancer IPs injected into VMCluster manifest - + vminsert: {{ preserved_vminsert_ip | default('N/A') }}, + vmselect: {{ preserved_vmselect_ip | default('N/A') }} +victoria_lb_ips_not_preserved: "No old LoadBalancer IPs found to preserve (fresh deploy or already operator-managed)" victoria_pods_not_ready: "Telemetry upgrade FAILED: Some pods are not ready. {{ pods_not_ready.stdout | int }} pod(s) not in Running state." victoria_pods_ready_after_wait: "All telemetry pods are ready after waiting" telemetry_upgrade_success: "Telemetry upgrade COMPLETED: All telemetry pods are running and ready." @@ -138,6 +142,12 @@ mysql_crash_error_msg: | 1. Check pod logs: kubectl logs -n telemetry -c mysqldb 2. Check PVC status: kubectl get pvc -n telemetry | grep idrac 3. Contact support if issue persists. + +# ============================================================================ +# PAUSE MESSAGES +# ============================================================================ +vmstorage_merge_wait_msg: "Waiting 30s for vmstorage background merges to settle..." +storage_cache_flush_msg: "Waiting 15s for storage cache flush..." idrac_patch_msg: >- idrac-telemetry patched: terminationGracePeriodSeconds=120s. MySQL will have enough time to flush on NFS during pod restart.