diff --git a/build_stream/orchestrator/common/result_poller.py b/build_stream/orchestrator/common/result_poller.py
index 6d35738773..6f40a91965 100644
--- a/build_stream/orchestrator/common/result_poller.py
+++ b/build_stream/orchestrator/common/result_poller.py
@@ -362,6 +362,11 @@ def _on_result_received(self, result: PlaybookResult) -> None:
                 # S12: On restart failure, still persist node_results.json
                 if result.stage_name == "restart":
                     self._on_restart_completed(result)
+                    self._on_restart_failure(result)
+
+                # On deploy failure, mark ImageGroup FAILED
+                if result.stage_name == "deploy":
+                    self._on_deploy_failure(result)
 
                 # On validate failure, mark ImageGroup FAILED
                 if result.stage_name == "validate":
@@ -968,3 +973,49 @@ def _on_deploy_failure(self, result: PlaybookResult) -> None:
                 job_id=str(result.job_id),
                 exc_info=True,
             )
+
+    def _on_restart_failure(self, result: PlaybookResult) -> None:
+        """Transition ImageGroup from RESTARTING to FAILED on restart failure."""
+        if self._image_group_repo is None:
+            log_secure_info(
+                "warning",
+                f"ImageGroup repo not available; skipping restart failure "
+                f"update for job={result.job_id}",
+                job_id=str(result.job_id),
+            )
+            return
+
+        try:
+            image_group = self._image_group_repo.find_by_job_id(
+                JobId(str(result.job_id))
+            )
+            if image_group is None:
+                log_secure_info(
+                    "error",
+                    f"Restart failure callback: No ImageGroup found for job={result.job_id}.",
+                    job_id=str(result.job_id),
+                )
+                return
+
+            self._image_group_repo.update_status(
+                image_group_id=image_group.id,
+                new_status=ImageGroupStatus.FAILED,
+            )
+
+            if hasattr(self._image_group_repo, 'session'):
+                self._image_group_repo.session.commit()
+
+            log_secure_info(
+                "warning",
+                f"Restart FAILED for job={result.job_id}. "
+                f"ImageGroup '{image_group.id}' -> FAILED.",
+                job_id=str(result.job_id),
+            )
+        except Exception as exc:  # pylint: disable=broad-except
+            log_secure_info(
+                "error",
+                "Failed to update ImageGroup status on restart "
+                f"failure for job={result.job_id}: {exc}",
+                job_id=str(result.job_id),
+                exc_info=True,
+            )
diff --git a/common/library/module_utils/local_repo/software_utils.py b/common/library/module_utils/local_repo/software_utils.py
index da20edea12..af3c1ffab9 100644
--- a/common/library/module_utils/local_repo/software_utils.py
+++ b/common/library/module_utils/local_repo/software_utils.py
@@ -38,6 +38,7 @@
     CSV_COLUMNS,
     SOFTWARE_CONFIG_SUBDIR,
     DEFAULT_STATUS_FILENAME,
+    STATUS_CSV_HEADER,
     RPM_LABEL_TEMPLATE,
     RHEL_OS_URL,
     SOFTWARES_KEY,
@@ -853,6 +854,16 @@ def check_csv_existence(path):
 
 def read_status_csv(csv_path):
     """Reads the status.csv file and returns a list of row dictionaries."""
+    # Ensure file has valid header before reading
+    if os.path.exists(csv_path) and os.path.getsize(csv_path) > 0:
+        with open(csv_path, 'r', encoding='utf-8') as file:
+            lines = file.readlines()
+            if lines and lines[0].strip() != STATUS_CSV_HEADER.strip():
+                # Header missing or invalid - prepend header to existing data
+                with open(csv_path, 'w', encoding='utf-8') as wfile:
+                    wfile.write(STATUS_CSV_HEADER)
+                    wfile.writelines(lines)
+
     with open(csv_path, mode='r', newline='', encoding='utf-8') as file:
         reader = csv.DictReader(file)
         return [row for row in reader]
diff --git a/common/library/modules/parallel_tasks.py b/common/library/modules/parallel_tasks.py
index 20268b10fa..99cc28652a 100644
--- a/common/library/modules/parallel_tasks.py
+++ b/common/library/modules/parallel_tasks.py
@@ -160,9 +160,19 @@ def determine_function(
 
         # Construct the status file path using DEFAULT_STATUS_FILENAME.
         status_file = os.path.join(csv_file_path, DEFAULT_STATUS_FILENAME)
+
+        # Ensure file exists with valid header
         if not os.path.exists(status_file) or os.stat(status_file).st_size == 0:
             with open(status_file, 'w', encoding="utf-8") as file:
                 file.write(STATUS_CSV_HEADER)
+        else:
+            with open(status_file, 'r', encoding="utf-8") as file:
+                lines = file.readlines()
+                if lines and lines[0].strip() != STATUS_CSV_HEADER.strip():
+                    # Header missing or invalid - prepend header to existing data
+                    with open(status_file, 'w', encoding="utf-8") as wfile:
+                        wfile.write(STATUS_CSV_HEADER)
+                        wfile.writelines(lines)
 
 
         task_type = task.get("type")
diff --git a/examples/powerscale_reference_files/secret.yaml b/examples/powerscale_reference_files/CSI_driver/secret.yaml
similarity index 100%
rename from examples/powerscale_reference_files/secret.yaml
rename to examples/powerscale_reference_files/CSI_driver/secret.yaml
diff --git a/examples/powerscale_reference_files/values.yaml b/examples/powerscale_reference_files/CSI_driver/values.yaml
similarity index 97%
rename from examples/powerscale_reference_files/values.yaml
rename to examples/powerscale_reference_files/CSI_driver/values.yaml
index 2b612e02ea..14826ff22e 100644
--- a/examples/powerscale_reference_files/values.yaml
+++ b/examples/powerscale_reference_files/CSI_driver/values.yaml
@@ -2,35 +2,35 @@
 ########################
 # version: version of this values file
 # Note: Do not change this value
-version: "v2.16.0"
+version: "v2.17.0"
 
 images:
   # "driver" defines the container image, used for the driver container.
   driver:
-    image: quay.io/dell/container-storage-modules/csi-isilon:v2.16.0
+    image: quay.io/dell/container-storage-modules/csi-isilon:v2.17.0
   # CSI sidecars
   attacher:
-    image: registry.k8s.io/sig-storage/csi-attacher:v4.10.0
+    image: registry.k8s.io/sig-storage/csi-attacher:v4.11.0
   provisioner:
-    image: registry.k8s.io/sig-storage/csi-provisioner:v6.1.0
+    image: registry.k8s.io/sig-storage/csi-provisioner:v6.2.0
   snapshotter:
-    image: registry.k8s.io/sig-storage/csi-snapshotter:v8.4.0
+    image: registry.k8s.io/sig-storage/csi-snapshotter:v8.5.0
   resizer:
-    image: registry.k8s.io/sig-storage/csi-resizer:v2.0.0
+    image: registry.k8s.io/sig-storage/csi-resizer:v2.1.0
   registrar:
-    image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.15.0
+    image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.16.0
   healthmonitor:
-    image: registry.k8s.io/sig-storage/csi-external-health-monitor-controller:v0.16.0
+    image: registry.k8s.io/sig-storage/csi-external-health-monitor-controller:v0.17.0
 
   # CSM sidecars
   replication:
-    image: quay.io/dell/container-storage-modules/dell-csi-replicator:v1.14.0
+    image: quay.io/dell/container-storage-modules/dell-csi-replicator:v1.15.0
   podmon:
-    image: quay.io/dell/container-storage-modules/podmon:v1.15.0
+    image: quay.io/dell/container-storage-modules/podmon:v1.16.0
   authorization:
-    image: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.4.0
+    image: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
   metadataretriever:
-    image: quay.io/dell/container-storage-modules/csi-metadata-retriever:v1.13.0
+    image: quay.io/dell/container-storage-modules/csi-metadata-retriever:v1.14.0
 
 # CSI driver log level
 # Allowed values: "error", "warn"/"warning", "info", "debug"
@@ -119,7 +119,7 @@ controller:
   # the Kubernetes release.
   # Allowed values: n, where n > 0
   # Default value: None
-  controllerCount: 2
+  controllerCount: 1
 
   # volumeNamePrefix: Prefix of PersistentVolume names created
   # Allowed values: string
@@ -184,7 +184,7 @@ controller:
     #   true: enable volume expansion feature(install resizer sidecar)
     #   false: disable volume snapshot feature(do not install resizer sidecar)
     # Default value: None
-    enabled: true
+    enabled: false
 
   healthMonitor:
     # enabled: Enable/Disable health monitor of CSI volumes- volume status, volume condition
@@ -192,7 +192,7 @@ controller:
     #   true: enable checking of health condition of CSI volumes
     #   false: disable checking of health condition of CSI volumes
     # Default value: None
-    enabled: false
+    enabled: true
 
     # interval: Interval of monitoring volume health condition
     # Allowed values: Number followed by unit of time (s,m,h)
@@ -301,7 +301,7 @@ node:
     #   true: enable checking of health condition of CSI volumes
     #   false: disable checking of health condition of CSI volumes
     # Default value: None
-    enabled: false
+    enabled: true
 
 ## PLATFORM ATTRIBUTES
 ######################
diff --git a/examples/powerscale_reference_files/powerscale_metrics/values.yaml b/examples/powerscale_reference_files/powerscale_metrics/values.yaml
new file mode 100644
index 0000000000..a89148cd79
--- /dev/null
+++ b/examples/powerscale_reference_files/powerscale_metrics/values.yaml
@@ -0,0 +1,221 @@
+karaviMetricsPowerflex:
+  image: quay.io/dell/container-storage-modules/csm-metrics-powerflex:v1.15.0
+  enabled: false
+  collectorAddr: otel-collector:55680
+  # comma separated list of provisioner names (ex: csi-vxflexos.dellemc.com)
+  provisionerNames: csi-vxflexos.dellemc.com
+  # set sdcMetricsEnabled to "false" to disable collection of SDC metrics
+  sdcMetricsEnabled: "true"
+  # set polling frequency to the PowerFlex array to get metrics data
+  sdcPollFrequencySeconds: 10
+  volumePollFrequencySeconds: 10
+  # set volumeMetricsEnabled to "false" to disable collection of Volume metrics
+  volumeMetricsEnabled: "true"
+  # set storageClassPoolMetricsEnabled to "false" to disable collection of storage class/pool metrics
+  storageClassPoolMetricsEnabled: "true"
+  # set the polling frequency to configure the interval which storage class/pool metrics are gathered
+  storageClassPoolPollFrequencySeconds: 10
+  # set topologyMetricsEnabled to "false" to disable collection of topology metrics
+  topologyMetricsEnabled: "true"
+  # set polling frequency to get topology metrics
+  topologyMetricsPollFrequencySeconds: 30
+  # set the the default max concurrent queries to PowerFlex
+  concurrentPowerflexQueries: 10
+  # set the default endpoint for PowerFlex service
+  endpoint: karavi-metrics-powerflex
+  service:
+    type: ClusterIP
+  logLevel: INFO
+  logFormat: text
+  authorization:
+    enabled: false
+    # sidecarProxy.image: the container image used for the csm-authorization-sidecar.
+    # Default value: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
+    sidecarProxy:
+      image: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
+    # proxyHost: hostname of the csm-authorization server
+    # Default value: None
+    proxyHost:
+    # skipCertificateValidation: certificate validation of the csm-authorization server
+    # Allowed Values:
+    #   "true" - TLS certificate verification will be skipped
+    #   "false" - TLS certificate will be verified
+    # Default value: "true"
+    skipCertificateValidation: true
+
+karaviMetricsPowerstore:
+  image: quay.io/dell/container-storage-modules/csm-metrics-powerstore:v1.15.0
+  enabled: false
+  collectorAddr: otel-collector:55680
+  # comma separated list of provisioner names (ex: csi-powerstore.dellemc.com)
+  provisionerNames: csi-powerstore.dellemc.com
+  # set polling frequency to the PowerStore array to get metrics data
+  volumePollFrequencySeconds: 20
+  spacePollFrequencySeconds: 300
+  arrayPollFrequencySeconds: 300
+  filesystemPollFrequencySeconds: 20
+  # apiTimeout: Defines the timeout for PowerStore API calls in seconds
+  # Allowed values: Number followed by unit (s,m,h)
+  # Examples: 60s, 5m, 1h
+  # Default value: 120s
+  apiTimeout: "120s"
+  # set volumeMetricsEnabled to "false" to disable collection of Volume metrics
+  volumeMetricsEnabled: "true"
+  # set the the default max concurrent queries to PowerStore
+  concurrentPowerstoreQueries: 10
+  # set topologyMetricsEnabled to "false" to disable collection of topology metrics
+  topologyMetricsEnabled: "true"
+  # set polling frequency to get topology metrics
+  topologyMetricsPollFrequencySeconds: 30
+  # set the default endpoint for PowerStore service
+  endpoint: karavi-metrics-powerstore
+  service:
+    type: ClusterIP
+  logLevel: INFO
+  logFormat: text
+  zipkin:
+    uri: ""
+    serviceName: metrics-powerstore
+    probability: 0.0
+  authorization:
+    enabled: false
+    # sidecarProxy.image: the container image used for the csm-authorization-sidecar.
+    # Default value: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
+    sidecarProxy:
+      image: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
+    # proxyHost: hostname of the csm-authorization server
+    # Default value: None
+    proxyHost:
+    # skipCertificateValidation: certificate validation of the csm-authorization server
+    # Allowed Values:
+    #   "true" - TLS certificate verification will be skipped
+    #   "false" - TLS certificate will be verified
+    # Default value: "true"
+    skipCertificateValidation: true
+
+karaviMetricsPowerscale:
+  image: quay.io/dell/container-storage-modules/csm-metrics-powerscale:v1.12.0
+  enabled: true
+  collectorAddr: otel-collector:55680
+  # comma separated list of provisioner names (ex: csi-isilon.dellemc.com)
+  provisionerNames: csi-isilon.dellemc.com
+  # set capacityMetricsEnabled to "false" to disable collection of capacity metrics
+  capacityMetricsEnabled: "true"
+  # set performanceMetricsEnabled to "false" to disable collection of performance metrics
+  performanceMetricsEnabled: "true"
+  # set topologyMetricsEnabled to "false" to disable collection of topology metrics
+  topologyMetricsEnabled: "true"
+  # set polling frequency to get cluster capacity metrics data
+  clusterCapacityPollFrequencySeconds: 30
+  # set polling frequency to get cluster performance data
+  clusterPerformancePollFrequencySeconds: 20
+  # set polling frequency to get quota capacity metrics data
+  quotaCapacityPollFrequencySeconds: 30
+  # set polling frequency to get topology metrics
+  topologyMetricsPollFrequencySeconds: 30
+  # set the default max concurrent queries to PowerScale
+  concurrentPowerscaleQueries: 10
+  # set the default endpoint for PowerScale service
+  endpoint: karavi-metrics-powerscale
+  service:
+    type: ClusterIP
+  logLevel: INFO
+  logFormat: text
+  # isiClientOptions to access Powerscale OneFS API server
+  isiClientOptions:
+    # set isiSkipCertificateValidation to true/false to skip/verify OneFS API server's certificates
+    # default isiSkipCertificateValidation: true to skip OneFS API server's certificates
+    isiSkipCertificateValidation: true
+    # set isiAuthType to 0/1 to enables session-based/basic Authentication
+    # default isiAuthType: 0 to use session-based Authentication
+    isiAuthType: 1
+    # set isiLogVerbose to 0/1/2 decide High/Medium/Low content of the OneFS REST API message should be logged in debug level logs
+    # default isiLogVerbose: 0 to log full content of the HTTP request and response
+    isiLogVerbose: 0
+  authorization:
+    enabled: false
+    # sidecarProxy.image: the container image used for the csm-authorization-sidecar.
+    # Default value: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
+    sidecarProxy:
+      image: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
+    # proxyHost: hostname of the csm-authorization server
+    # Default value: None
+    proxyHost:
+    # skipCertificateValidation: certificate validation of the csm-authorization server
+    # Allowed Values:
+    #   "true" - TLS certificate verification will be skipped
+    #   "false" - TLS certificate will be verified
+    # Default value: "true"
+    skipCertificateValidation: true
+
+karaviMetricsPowermax:
+  image: quay.io/dell/container-storage-modules/csm-metrics-powermax:v1.10.0
+  enabled: false
+  collectorAddr: otel-collector:55680
+  # comma separated list of provisioner names (ex: csi-powermax.dellemc.com)
+  provisionerNames: csi-powermax.dellemc.com
+  # set capacityMetricsEnabled to "false" to disable collection of capacity metrics
+  capacityMetricsEnabled: "true"
+  # set performanceMetricsEnabled to "false" to disable collection of performance metrics
+  performanceMetricsEnabled: "true"
+  # set polling frequency to get capacity metrics data for volume, storagegroup, srp and array
+  capacityPollFrequencySeconds: 3600
+  # set polling frequency to get performance metrics data for volume, storagegroup
+  performancePollFrequencySeconds: 300
+  # set the default max concurrent queries to PowerMax
+  concurrentPowermaxQueries: 10
+  # set topologyMetricsEnabled to "false" to disable collection of topology metrics
+  topologyMetricsEnabled: "true"
+  # set polling frequency to get topology metrics
+  topologyMetricsPollFrequencySeconds: 300
+  # set the default endpoint for PowerMax service
+  endpoint: karavi-metrics-powermax
+  # useSecret
+  # Defines if a Secret should be used to provide Unisphere for PowerMax endpoints
+  # and login credentials instead of the deprecated powermax-reverseproxy-config ConfigMap.
+  # If set to true, the contents of the secret specified by defaultCredentialsSecret
+  # will be used, in the new format, to specify Unisphere for PowerMax endpoints, array IDs,
+  # and login credentials. If set to false, the deprecated ConfigMap will be automatically
+  # created and used.
+  # Default value: false
+  useSecret: false
+  # defaultCredentialsSecret
+  # The name of the Kubernetes Secret containing the details of the PowerMax arrays,
+  # their Unisphere endpoints and their login credentials if useSecret is set to true.
+  # Default value: ""
+  defaultCredentialsSecret: ""
+  service:
+    type: ClusterIP
+  logLevel: INFO
+  logFormat: text
+  authorization:
+    enabled: false
+    # sidecarProxy.image: the container image used for the csm-authorization-sidecar.
+    # Default value: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
+    sidecarProxy:
+      image: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
+    # proxyHost: hostname of the csm-authorization server
+    # Default value: None
+    proxyHost:
+    # skipCertificateValidation: certificate validation of the csm-authorization server
+    # Allowed Values:
+    #   "true" - TLS certificate verification will be skipped
+    #   "false" - TLS certificate will be verified
+    # Default value: "true"
+    skipCertificateValidation: true
+
+otelCollector:
+  image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector:0.150.1
+  service:
+    type: ClusterIP
+  nginxProxy:
+    image: nginxinc/nginx-unprivileged:1.29
+# Karavi-observability requires cert-manager. If cert-manager is already present in cluster, set enabled to false not to install it.
+cert-manager:
+  enabled: true
+  startupapicheck:
+    enabled: false
+    serviceAccount:
+      create: false
+# Optionally, uncomment and specify the name of the pre-created namespace to install the module in it
+# namespace:
\ No newline at end of file
diff --git a/provision/roles/configure_ochami/tasks/validate_additional_cloud_init.yml b/provision/roles/configure_ochami/tasks/validate_additional_cloud_init.yml
index 65747e39a4..50b9545c53 100644
--- a/provision/roles/configure_ochami/tasks/validate_additional_cloud_init.yml
+++ b/provision/roles/configure_ochami/tasks/validate_additional_cloud_init.yml
@@ -33,6 +33,12 @@
     additional_cloud_init_fg_names: []
   when: additional_cloud_init_file_path == ''
 
+- name: Create cloud-init directory
+  ansible.builtin.file:
+    path: "{{ cloud_init_dir }}"
+    state: directory
+    mode: "{{ hostvars['localhost']['dir_permissions_755'] }}"
+
 - name: Load additional cloud-init config
   when: additional_cloud_init_file_path != ''
   block:
diff --git a/provision/roles/telemetry/tasks/apply_telemetry_on_upgrade.yml b/provision/roles/telemetry/tasks/apply_telemetry_on_upgrade.yml
deleted file mode 100644
index 0cdb4bd2cb..0000000000
--- a/provision/roles/telemetry/tasks/apply_telemetry_on_upgrade.yml
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
----
-
-- name: Apply telemetry configurations for upgrade
-  when:
-    - kube_vip is defined
-    - kube_vip | length > 0
-    - idrac_telemetry_support | default(false) | bool
-  block:
-    - name: Check if telemetry deployment file exists
-      ansible.builtin.stat:
-        path: "{{ idrac_telemetry_statefulset_path }}"
-      register: telemetry_stat
-
-    - name: Get current iDRAC telemetry StatefulSet configuration
-      kubernetes.core.k8s_info:
-        api_version: apps/v1
-        kind: StatefulSet
-        name: idrac-telemetry
-        namespace: "{{ telemetry_namespace }}"
-      register: current_idrac_statefulset
-      failed_when: false
-      when:
-        - telemetry_stat.stat.exists | default(false)
-
-    - name: Set replica count as fact
-      ansible.builtin.set_fact:
-        preserved_replica_count: "{{ current_idrac_statefulset.resources[0].spec.replicas | default(1) }}"
-      when:
-        - current_idrac_statefulset.resources is defined and current_idrac_statefulset.resources | length > 0
-
-    - name: Show current replica count
-      ansible.builtin.debug:
-        msg: "Current replica count: {{ preserved_replica_count }}"
-        verbosity: 2
-      when:
-        - preserved_replica_count is defined
-
-    - name: Read iDRAC telemetry StatefulSet YAML file
-      ansible.builtin.slurp:
-        src: "{{ idrac_telemetry_statefulset_path }}"
-      register: idrac_statefulset_yaml
-
-    - name: Update StatefulSet definition with preserved replica count
-      ansible.builtin.set_fact:
-        updated_statefulset_definition: "{{ idrac_statefulset_yaml.content | b64decode | regex_replace('---\\n', '') | from_yaml | combine({'spec': {'replicas': preserved_replica_count | int}}, recursive=true) }}"  # noqa: yaml[line-length]
-      when:
-        - telemetry_stat.stat.exists | default(false)
-        - preserved_replica_count is defined
-
-    - name: Apply iDRAC telemetry StatefulSet with preserved replica count
-      kubernetes.core.k8s:
-        state: present
-        definition: "{{ updated_statefulset_definition }}"
-      register: kubectl_apply_result
-      when:
-        - updated_statefulset_definition is defined
-        - telemetry_stat.stat.exists | default(false)
-
-    - name: Display kubectl apply result
-      ansible.builtin.debug:
-        msg: "{{ kubectl_apply_result }}"
-      when:
-        - kubectl_apply_result is defined
-
-    - name: Wait for idrac telemetry receiver to be ready
-      kubernetes.core.k8s_info:
-        api_version: v1
-        kind: Pod
-        namespace: "{{ telemetry_namespace }}"
-        label_selectors:
-          - "app=idrac-telemetry-receiver"
-        wait: true
-        wait_condition:
-          type: Ready
-          status: "True"
-        wait_timeout: 120
-      delegate_to: "{{ kube_vip }}"
-      register: idrac_telemetry_receiver_ready
-      failed_when: false
-      when:
-        - idrac_telemetry_support | default(false) | bool
-
-    - name: Display idrac telemetry receiver ready status
-      ansible.builtin.debug:
-        msg: "{{ idrac_telemetry_receiver_ready }}"
-      when:
-        - idrac_telemetry_support | default(false) | bool
-        - idrac_telemetry_receiver_ready is defined
-
-- name: Apply LDMS configurations for upgrade
-  when:
-    - kube_vip is defined
-    - kube_vip | length > 0
-    - ldms_support | default(false) | bool
-  block:
-    - name: Check if LDMS aggregator is running on service k8s cluster
-      kubernetes.core.k8s_info:
-        api_version: apps/v1
-        kind: StatefulSet
-        name: nersc-ldms-aggr
-        namespace: "{{ telemetry_namespace }}"
-      delegate_to: "{{ kube_vip }}"
-      register: ldms_statefulset_info
-      failed_when: false
-
-    - name: Set LDMS running state
-      ansible.builtin.set_fact:
-        ldms_running: "{{ ldms_statefulset_info.resources is defined and ldms_statefulset_info.resources | length > 0 }}"
-
-    - name: Check if LDMS store daemon is running on service k8s cluster
-      kubernetes.core.k8s_info:
-        api_version: v1
-        kind: Pod
-        namespace: "{{ telemetry_namespace }}"
-        label_selectors:
-          - "app=nersc-ldms-store"
-      delegate_to: "{{ kube_vip }}"
-      register: ldms_store_pod_info
-      failed_when: false
-      when:
-        - ldms_running | default(false) | bool
-
-    - name: Set LDMS store daemon running state
-      ansible.builtin.set_fact:
-        ldms_store_running: "{{ ldms_store_pod_info.resources is defined and ldms_store_pod_info.resources | length > 0 }}"
-      when:
-        - ldms_running | default(false) | bool
-
-    - name: Restart LDMS store daemon pod
-      kubernetes.core.k8s:
-        state: absent
-        api_version: v1
-        kind: Pod
-        name: "{{ ldms_store_pod_info.resources[0].metadata.name }}"
-        namespace: "{{ telemetry_namespace }}"
-      delegate_to: "{{ kube_vip }}"
-      failed_when: false
-      when:
-        - ldms_store_running | default(false) | bool
-
-    - name: Wait for LDMS store daemon pod to be ready after restart
-      kubernetes.core.k8s_info:
-        api_version: v1
-        kind: Pod
-        namespace: "{{ telemetry_namespace }}"
-        label_selectors:
-          - "app=nersc-ldms-store"
-        wait: true
-        wait_condition:
-          type: Ready
-          status: "True"
-        wait_timeout: 120
-      delegate_to: "{{ kube_vip }}"
-      register: ldms_store_pod_ready
-      failed_when: false
-      when:
-        - ldms_store_running | default(false) | bool
-
-    - name: Display LDMS store daemon restart status
-      ansible.builtin.debug:
-        msg: >
-          {{ ldms_store_pod_ready_msg
-          if (ldms_store_pod_ready.resources | default([]) | length > 0)
-          else ldms_store_pod_not_ready_msg }}
-      when:
-        - ldms_store_running | default(false) | bool
-
-    - name: Check if decomp.json exists
-      ansible.builtin.stat:
-        path: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/ldms/nersc-ldms-aggr/scripts/decomp.json"
-      register: decomp_json_stat
-
-    - name: Copy decompose.json if it doesn't exist
-      ansible.builtin.copy:
-        src: files/scripts/decomp.json
-        dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/ldms/nersc-ldms-aggr/scripts/decomp.json"
-        mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
-      when: not decomp_json_stat.stat.exists
-
-    - name: Restart LDMS aggregator StatefulSet
-      kubernetes.core.k8s:
-        state: present
-        definition:
-          apiVersion: apps/v1
-          kind: StatefulSet
-          metadata:
-            name: nersc-ldms-aggr
-            namespace: "{{ telemetry_namespace }}"
-          spec:
-            template:
-              metadata:
-                annotations:
-                  kubectl.kubernetes.io/restartedAt: "{{ ansible_date_time.iso8601 }}"
-      delegate_to: "{{ kube_vip }}"
-      failed_when: false
-      when:
-        - ldms_running | default(false) | bool
-        - ldms_conf_file.stat.exists | default(false)
-        - ldms_bin_file.stat.exists | default(false)
-
-    - name: Wait for LDMS aggregator pod to be ready after restart
-      kubernetes.core.k8s_info:
-        api_version: v1
-        kind: Pod
-        namespace: "{{ telemetry_namespace }}"
-        label_selectors:
-          - "app=nersc-ldms-aggr"
-        wait: true
-        wait_condition:
-          type: Ready
-          status: "True"
-        wait_timeout: 120
-      delegate_to: "{{ kube_vip }}"
-      register: ldms_pod_ready
-      failed_when: false
-      when:
-        - ldms_running | default(false) | bool
-        - ldms_conf_file.stat.exists | default(false)
-        - ldms_bin_file.stat.exists | default(false)
-
-    - name: Display LDMS aggregator restart status
-      ansible.builtin.debug:
-        msg: "{{ ldms_pod_ready_msg if (ldms_pod_ready.resources | default([]) | length > 0) else ldms_pod_not_ready_msg }}"
-      when:
-        - ldms_running | default(false) | bool
-        - ldms_conf_file.stat.exists | default(false)
-        - ldms_bin_file.stat.exists | default(false)
diff --git a/provision/roles/telemetry/tasks/derive_sink_support_flags.yml b/provision/roles/telemetry/tasks/derive_sink_support_flags.yml
index 3e59602e44..7f2767d20a 100644
--- a/provision/roles/telemetry/tasks/derive_sink_support_flags.yml
+++ b/provision/roles/telemetry/tasks/derive_sink_support_flags.yml
@@ -68,34 +68,52 @@
       additional_remote_write_endpoints: "{{ telemetry_config.powerscale_configurations.additional_remote_write_endpoints | default([]) }}"
   when: telemetry_config.powerscale_configurations is defined
 
-- name: Check if any source targets victoria_metrics
+- name: Check if any enabled source targets victoria_metrics
   ansible.builtin.set_fact:
     victoria_metrics_support: true
     cacheable: true
   when: >-
-    'victoria_metrics' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([])) or
-    'victoria_metrics' in (telemetry_config.telemetry_sources.powerscale.collection_targets | default([])) or
-    'victoria_metrics' in (telemetry_config.telemetry_sources.ufm.collection_targets | default([])) or
-    'victoria_metrics' in (telemetry_config.telemetry_sources.vast.collection_targets | default([]))
+    ((telemetry_config.telemetry_sources.idrac.metrics_enabled | default(false) | bool) and
+     'victoria_metrics' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([]))) or
+    (((telemetry_config.telemetry_sources.powerscale.metrics_enabled | default(false) | bool) or
+      (telemetry_config.telemetry_sources.powerscale.logs_enabled | default(false) | bool)) and
+     'victoria_metrics' in (telemetry_config.telemetry_sources.powerscale.collection_targets | default([]))) or
+    (((telemetry_config.telemetry_sources.ufm.metrics_enabled | default(false) | bool) or
+      (telemetry_config.telemetry_sources.ufm.logs_enabled | default(false) | bool)) and
+     'victoria_metrics' in (telemetry_config.telemetry_sources.ufm.collection_targets | default([]))) or
+    (((telemetry_config.telemetry_sources.vast.metrics_enabled | default(false) | bool) or
+      (telemetry_config.telemetry_sources.vast.logs_enabled | default(false) | bool)) and
+     'victoria_metrics' in (telemetry_config.telemetry_sources.vast.collection_targets | default([])))
 
-- name: Check if any source targets victoria_logs
+- name: Check if any enabled source targets victoria_logs
   ansible.builtin.set_fact:
     victoria_logs_support: true
     cacheable: true
   when: >-
-    'victoria_logs' in (telemetry_config.telemetry_sources.powerscale.collection_targets | default([])) or
-    'victoria_logs' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([])) or
-    'victoria_logs' in (telemetry_config.telemetry_sources.ufm.collection_targets | default([])) or
-    'victoria_logs' in (telemetry_config.telemetry_sources.vast.collection_targets | default([]))
+    (((telemetry_config.telemetry_sources.powerscale.metrics_enabled | default(false) | bool) or
+      (telemetry_config.telemetry_sources.powerscale.logs_enabled | default(false) | bool)) and
+     'victoria_logs' in (telemetry_config.telemetry_sources.powerscale.collection_targets | default([]))) or
+    ((telemetry_config.telemetry_sources.idrac.metrics_enabled | default(false) | bool) and
+     'victoria_logs' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([]))) or
+    (((telemetry_config.telemetry_sources.ufm.metrics_enabled | default(false) | bool) or
+      (telemetry_config.telemetry_sources.ufm.logs_enabled | default(false) | bool)) and
+     'victoria_logs' in (telemetry_config.telemetry_sources.ufm.collection_targets | default([]))) or
+    (((telemetry_config.telemetry_sources.vast.metrics_enabled | default(false) | bool) or
+      (telemetry_config.telemetry_sources.vast.logs_enabled | default(false) | bool)) and
+     'victoria_logs' in (telemetry_config.telemetry_sources.vast.collection_targets | default([])))
 
-- name: Check if any source targets Kafka
+- name: Check if any enabled source targets Kafka
   ansible.builtin.set_fact:
     kafka_support: true
     cacheable: true
   when: >-
-    'kafka' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([])) or
-    'kafka' in (telemetry_config.telemetry_sources.ldms.collection_targets | default([])) or
-    'kafka' in (telemetry_config.telemetry_sources.ome.collection_targets | default([]))
+    ((telemetry_config.telemetry_sources.idrac.metrics_enabled | default(false) | bool) and
+     'kafka' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([]))) or
+    ((telemetry_config.telemetry_sources.ldms.metrics_enabled | default(false) | bool) and
+     'kafka' in (telemetry_config.telemetry_sources.ldms.collection_targets | default([]))) or
+    (((telemetry_config.telemetry_sources.ome.metrics_enabled | default(false) | bool) or
+      (telemetry_config.telemetry_sources.ome.logs_enabled | default(false) | bool)) and
+     'kafka' in (telemetry_config.telemetry_sources.ome.collection_targets | default([])))
 
 # =============================================================================
 # VECTOR BRIDGE LOGIC - Determine sink requirements based on Vector bridges
@@ -142,17 +160,17 @@
 - name: Set global variable for telemetry_enabled
   ansible.builtin.set_fact:
     telemetry_enabled: true
-  when: >
-    idrac_telemetry_support or
-    powerscale_metrics_enabled or
-    powerscale_log_enabled or
-    victoria_metrics_support or
-    victoria_logs_support or
-    ldms_support or
-    kafka_support or
-    ufm_telemetry_support or
-    ufm_log_enabled or
-    vast_telemetry_support or
-    vast_log_enabled or
-    ome_metrics_enabled or
-    ome_logs_enabled
+  when: >-
+    (telemetry_config.telemetry_sources.idrac.metrics_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.ldms.metrics_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.powerscale.metrics_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.powerscale.logs_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.ufm.metrics_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.ufm.logs_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.vast.metrics_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.vast.logs_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.ome.metrics_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_sources.ome.logs_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_bridges.vector_ldms.metrics_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_bridges.vector_ome.metrics_enabled | default(false) | bool) or
+    (telemetry_config.telemetry_bridges.vector_ome.logs_enabled | default(false) | bool)
diff --git a/provision/roles/telemetry/tasks/main.yml b/provision/roles/telemetry/tasks/main.yml
index c513480a37..8a7e9f6ab2 100644
--- a/provision/roles/telemetry/tasks/main.yml
+++ b/provision/roles/telemetry/tasks/main.yml
@@ -27,8 +27,28 @@
 - name: Derive sink support flags from collection_targets
   ansible.builtin.include_tasks: derive_sink_support_flags.yml
 
+- name: Set pulp server facts for cloud-init templates
+  when:
+    - hostvars['localhost']['service_k8s_support'] | default(false) | bool
+  block:
+    - name: Run pulp status command on omnia_core container
+      ansible.builtin.command: /usr/local/bin/pulp status
+      delegate_to: localhost
+      changed_when: false
+      register: pulp_status_output
+
+    - name: Set pulp content origin value
+      ansible.builtin.set_fact:
+        pulp_content_origin: "{{ (pulp_status_output.stdout | from_json).content_settings.content_origin }}"
+
+    - name: Set pulp_server_ip fact
+      ansible.builtin.set_fact:
+        pulp_server_ip: "{{ pulp_content_origin | urlsplit('hostname') }}"
+
 - name: Configure service_k8s telemetry services
-  when: hostvars['localhost']['service_k8s_support'] | default(false) | bool
+  when:
+    - hostvars['localhost']['service_k8s_support'] | default(false) | bool
+    - telemetry_enabled | default(false) | bool
   block:
     - name: Read telemetry packages from software config
       ansible.builtin.include_tasks: read_software_config.yml
@@ -148,8 +168,3 @@
         - telemetry_enabled | default(false) | bool
       tags:
         - telemetry_deployment
-
-    # - name: Apply telemetry configurations on upgrade
-    #   ansible.builtin.include_tasks: apply_telemetry_on_upgrade.yml
-    #   when:
-    #     - hostvars['localhost']['upgrade_enabled'] | default(false) | bool
diff --git a/provision/roles/telemetry/tasks/read_software_config.yml b/provision/roles/telemetry/tasks/read_software_config.yml
index a50607e4ed..36300d0a52 100644
--- a/provision/roles/telemetry/tasks/read_software_config.yml
+++ b/provision/roles/telemetry/tasks/read_software_config.yml
@@ -13,20 +13,6 @@
 #  limitations under the License.
 ---
 
-- name: Run pulp status command on omnia_core container
-  ansible.builtin.command: /usr/local/bin/pulp status
-  delegate_to: localhost
-  changed_when: false
-  register: pulp_status_output
-
-- name: Set pulp content origin value
-  ansible.builtin.set_fact:
-    pulp_content_origin: "{{ (pulp_status_output.stdout | from_json).content_settings.content_origin }}"
-
-- name: Set fact for pulp protocol
-  ansible.builtin.set_fact:
-    pulp_server_ip: "{{ pulp_content_origin | urlsplit('hostname') }}"
-
 - name: Get cluster_os_type from software_config.json
   ansible.builtin.set_fact:
     cluster_os_type: "{{ software_config['cluster_os_type'] }}"
diff --git a/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml b/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml
index d725cf067a..addac543ab 100644
--- a/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml
+++ b/upgrade/roles/upgrade_telemetry/tasks/apply_victoria_crs.yml
@@ -117,11 +117,7 @@
 
 - name: Display LoadBalancer IP injection status
   ansible.builtin.debug:
-    msg: >-
-      {{ victoria_lb_ips_preserved
-         if (preserved_vminsert_ip | default('') | length > 0)
-            or (preserved_vmselect_ip | default('') | length > 0)
-         else victoria_lb_ips_not_preserved }}
+    msg: "{{ victoria_lb_ip_injection_status }}"
 
 # ── Apply main CR (VMCluster only — 2.2 cluster mode only) ──
 - name: Apply VMCluster CR (cluster mode only) with retry
@@ -135,6 +131,125 @@
   delegate_to: "{{ kube_vip }}"
   connection: ssh
 
+# ── Wait for VMCluster LoadBalancer IPs and reclaim if reassigned ──
+# The operator creates vminsert/vmselect services asynchronously after the CR is applied.
+# We MUST wait for these services to get their LoadBalancer IPs BEFORE Phase 3
+# (telemetry.sh) runs, because telemetry.sh also creates VictoriaLogs services via
+# kubectl apply -k. If VL services are created before VM services exist, MetalLB
+# assigns the freed IPs to VL services, leaving VM services in <pending> state.
+#
+# If the preserved IPs got assigned to wrong services, we reclaim them:
+#   1. Find services holding the preserved IPs that are NOT vminsert/vmselect
+#   2. Delete those conflicting services to free the IPs
+#   3. Wait for vminsert/vmselect to reclaim the preserved IPs
+
+- name: Initial wait for vminsert LoadBalancer IP
+  ansible.builtin.shell: |
+    kubectl -n {{ telemetry_namespace }} get svc vminsert-{{ new_vmcluster_name }} \
+      -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo ""
+  register: vminsert_lb_ip
+  until: vminsert_lb_ip.stdout | trim | length > 0
+  retries: "{{ lb_ip_wait_retries }}"
+  delay: "{{ lb_ip_wait_delay }}"
+  changed_when: false
+  failed_when: false
+  delegate_to: "{{ kube_vip }}"
+  connection: ssh
+
+- name: Initial wait for vmselect LoadBalancer IP
+  ansible.builtin.shell: |
+    kubectl -n {{ telemetry_namespace }} get svc vmselect-{{ new_vmcluster_name }} \
+      -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo ""
+  register: vmselect_lb_ip
+  until: vmselect_lb_ip.stdout | trim | length > 0
+  retries: "{{ lb_ip_wait_retries }}"
+  delay: "{{ lb_ip_wait_delay }}"
+  changed_when: false
+  failed_when: false
+  delegate_to: "{{ kube_vip }}"
+  connection: ssh
+
+# ── Reclaim reassigned IPs if VMCluster services are still pending ──
+- name: Reclaim preserved IPs from conflicting services
+  when:
+    - preserved_vminsert_ip | default('') | length > 0 or preserved_vmselect_ip | default('') | length > 0
+    - vminsert_lb_ip.stdout | trim | length == 0 or vmselect_lb_ip.stdout | trim | length == 0
+  block:
+    - name: Stage IP conflict detection script
+      ansible.builtin.template:
+        src: find_ip_conflict_svcs.sh.j2
+        dest: "{{ ip_conflict_script_path }}"
+        mode: "{{ executable_mode }}"
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+    - name: Find services holding preserved IPs that are not VMCluster services
+      ansible.builtin.command: "{{ ip_conflict_script_path }}"
+      register: ip_conflict_svcs
+      changed_when: false
+      failed_when: false
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+    - name: Remove IP conflict detection script
+      ansible.builtin.file:
+        path: "{{ ip_conflict_script_path }}"
+        state: absent
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+    - name: Display services holding preserved IPs
+      ansible.builtin.debug:
+        msg: "{{ victoria_lb_ip_conflict_svcs_found }}"
+      when: ip_conflict_svcs.stdout_lines | default([]) | select() | list | length > 0
+
+    - name: Delete conflicting services holding preserved IPs
+      ansible.builtin.command:
+        cmd: kubectl -n {{ telemetry_namespace }} delete svc {{ item }} --timeout=30s
+      loop: "{{ ip_conflict_svcs.stdout_lines | default([]) | select() | list }}"
+      changed_when: true
+      failed_when: false
+      when: ip_conflict_svcs.stdout_lines | default([]) | select() | list | length > 0
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+    - name: Wait for vminsert to reclaim preserved IP
+      ansible.builtin.shell: |
+        kubectl -n {{ telemetry_namespace }} get svc vminsert-{{ new_vmcluster_name }} \
+          -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo ""
+      register: vminsert_lb_ip
+      until: vminsert_lb_ip.stdout | trim | length > 0
+      retries: "{{ lb_ip_wait_retries }}"
+      delay: "{{ lb_ip_wait_delay }}"
+      changed_when: false
+      failed_when: false
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+    - name: Wait for vmselect to reclaim preserved IP
+      ansible.builtin.shell: |
+        kubectl -n {{ telemetry_namespace }} get svc vmselect-{{ new_vmcluster_name }} \
+          -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo ""
+      register: vmselect_lb_ip
+      until: vmselect_lb_ip.stdout | trim | length > 0
+      retries: "{{ lb_ip_wait_retries }}"
+      delay: "{{ lb_ip_wait_delay }}"
+      changed_when: false
+      failed_when: false
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+- name: Display confirmed LoadBalancer IPs
+  ansible.builtin.debug:
+    msg: "{{ victoria_lb_ip_confirmed }}"
+
+- name: Warn if LoadBalancer IPs still not assigned after reclaim
+  ansible.builtin.debug:
+    msg: "{{ victoria_lb_ip_reclaim_failed }}"
+  when: >-
+    (vminsert_lb_ip is defined and vminsert_lb_ip.stdout is defined and vminsert_lb_ip.stdout | trim | length == 0) or
+    (vmselect_lb_ip is defined and vmselect_lb_ip.stdout is defined and vmselect_lb_ip.stdout | trim | length == 0)
+
 # ── Apply scrape and agent CRs ──
 - name: Check for VMScrape manifest
   ansible.builtin.stat:
diff --git a/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml b/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml
index 23648e2ed6..b290bc639a 100644
--- a/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml
+++ b/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml
@@ -252,6 +252,8 @@
     # ── Cleanup old pre-operator services and deployments ──
     # The operator creates new services with different names (e.g. vminsert-victoria-cluster),
     # so the old standalone services become stale and waste LoadBalancer IPs.
+    # Old services MUST be deleted BEFORE applying VMCluster CR so MetalLB can
+    # assign the same IPs to the new operator-managed services via loadBalancerIP.
     - name: Find old pre-operator services
       ansible.builtin.shell: |
         set -o pipefail
@@ -285,6 +287,5 @@
 
     - name: Display old resource cleanup summary
       ansible.builtin.debug:
-        msg:
-          - "Old services deleted: {{ old_services.stdout_lines | default([]) | select() | list }}"
-          - "Old vmagent deployment cleanup attempted: {{ old_vmagent_deployment }}"
+        msg: "{{ victoria_old_svc_cleanup_summary }}"
+        verbosity: 2
diff --git a/upgrade/roles/upgrade_telemetry/templates/find_ip_conflict_svcs.sh.j2 b/upgrade/roles/upgrade_telemetry/templates/find_ip_conflict_svcs.sh.j2
new file mode 100644
index 0000000000..c21217afad
--- /dev/null
+++ b/upgrade/roles/upgrade_telemetry/templates/find_ip_conflict_svcs.sh.j2
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Find services in the telemetry namespace that are holding LoadBalancer IPs
+# which should belong to VMCluster services (vminsert/vmselect).
+# This can happen when MetalLB reassigns freed IPs to other services
+# before the VMCluster services are created by the operator.
+#
+# Usage: bash find_ip_conflict_svcs.sh
+# Output: One service name per line (services holding conflicting IPs)
+
+set -o pipefail
+
+PRESERVED_IPS="{{ preserved_vminsert_ip | default('') }} {{ preserved_vmselect_ip | default('') }}"
+VMCLUSTER_SVCS="vminsert-{{ new_vmcluster_name }} vmselect-{{ new_vmcluster_name }}"
+
+kubectl -n {{ telemetry_namespace }} get svc -o json 2>/dev/null | \
+  python3 -c "
+import json, sys
+data = json.load(sys.stdin)
+preserved = set('${PRESERVED_IPS}'.split())
+vmcluster = set('${VMCLUSTER_SVCS}'.split())
+for svc in data.get('items', []):
+    name = svc['metadata']['name']
+    if name in vmcluster:
+        continue
+    ingress = svc.get('status', {}).get('loadBalancer', {}).get('ingress', [])
+    for ing in ingress:
+        ip = ing.get('ip', '')
+        if ip in preserved:
+            print(name)
+            break
+" || true
diff --git a/upgrade/roles/upgrade_telemetry/vars/main.yml b/upgrade/roles/upgrade_telemetry/vars/main.yml
index a869e5a52a..5d51a1a057 100644
--- a/upgrade/roles/upgrade_telemetry/vars/main.yml
+++ b/upgrade/roles/upgrade_telemetry/vars/main.yml
@@ -45,6 +45,11 @@ pod_wait_delay: 15
 idrac_rollout_retries: 3
 idrac_rollout_delay: 30
 
+# LoadBalancer IP wait configuration
+lb_ip_wait_retries: 30
+lb_ip_wait_delay: 5
+ip_conflict_script_path: /tmp/find_ip_conflict_svcs.sh
+
 # Victoria operator configuration
 # victoria_operator_pkg is loaded dynamically from service_k8s JSON in include_required_input.yml
 victoria_operator_release_name: victoria-metrics-operator
@@ -105,7 +110,34 @@ victoria_lb_ips_preserved: >-
   LoadBalancer IPs injected into VMCluster manifest -
   vminsert: {{ preserved_vminsert_ip | default('N/A') }},
   vmselect: {{ preserved_vmselect_ip | default('N/A') }}
+victoria_lb_ip_injection_status: >-
+  {{ victoria_lb_ips_preserved
+     if (preserved_vminsert_ip | default('') | length > 0)
+        or (preserved_vmselect_ip | default('') | length > 0)
+     else victoria_lb_ips_not_preserved }}
 victoria_lb_ips_not_preserved: "No old LoadBalancer IPs found to preserve (fresh deploy or already operator-managed)"
+victoria_lb_ip_confirmed: >-
+  VMCluster LoadBalancer IPs confirmed -
+  vminsert-{{ new_vmcluster_name }}: {{ vminsert_lb_ip.stdout | default('PENDING') | trim }},
+  vmselect-{{ new_vmcluster_name }}: {{ vmselect_lb_ip.stdout | default('PENDING') | trim }}
+victoria_lb_ip_reclaim_needed: >-
+  VMCluster services still pending after initial wait.
+  Checking if preserved IPs were assigned to wrong services...
+victoria_lb_ip_conflict_svcs_found: >-
+  Services holding preserved IPs (will be deleted and re-created by telemetry.sh):
+  {{ ip_conflict_svcs.stdout_lines | default([]) | select() | list }}
+victoria_lb_ip_reclaim_success: >-
+  Successfully reclaimed preserved IPs for VMCluster services -
+  vminsert-{{ new_vmcluster_name }}: {{ vminsert_lb_ip.stdout | default('PENDING') | trim }},
+  vmselect-{{ new_vmcluster_name }}: {{ vmselect_lb_ip.stdout | default('PENDING') | trim }}
+victoria_lb_ip_reclaim_failed: >-
+  WARNING: VMCluster services still do not have LoadBalancer IPs after reclaim attempt.
+  vminsert: {{ vminsert_lb_ip.stdout | default('NONE') | trim }},
+  vmselect: {{ vmselect_lb_ip.stdout | default('NONE') | trim }}.
+  Please use new assigned IPs.
+victoria_old_svc_cleanup_summary: >-
+  Old services deleted: {{ old_services.stdout_lines | default([]) | select() | list }}.
+  Old vmagent deployment cleanup attempted: {{ old_vmagent_deployment }}
 victoria_pods_not_ready: "Telemetry upgrade FAILED: Some pods are not ready. {{ pods_not_ready.stdout | int }} pod(s) not in Running state."
 victoria_pods_ready_after_wait: "All telemetry pods are ready after waiting"
 telemetry_upgrade_success: "Telemetry upgrade COMPLETED: All telemetry pods are running and ready."