diff --git a/discovery/roles/ome_discovery/tasks/generate_discovery_report.yml b/discovery/roles/ome_discovery/tasks/generate_discovery_report.yml
index f003ea40fb..686f5fee2c 100644
--- a/discovery/roles/ome_discovery/tasks/generate_discovery_report.yml
+++ b/discovery/roles/ome_discovery/tasks/generate_discovery_report.yml
@@ -56,10 +56,7 @@
       - ""
       - "3. Update HOSTNAME, FUNCTIONAL_GROUP_NAME, GROUP_NAME as needed."
       - ""
-      - "4. Update the following parameter in provision_config.yml:"
-      - "     pxe_mapping_file_path: {{ pxe_mapping_output_file }}"
-      - ""
-      - "5. Run:"
+      - "4. Run:"
       - "     ansible-playbook provision/provision.yml"
       - "============================================================"
 
@@ -84,12 +81,12 @@
       - ""
       - "3. Update HOSTNAME, FUNCTIONAL_GROUP_NAME, GROUP_NAME as needed."
       - ""
-      - "4. Update the following parameter in provision_config.yml:"
-      - "     pxe_mapping_file_path: {{ pxe_mapping_output_file }}"
-      - ""
-      - "5. If using BuildStream, manually copy the PXE mapping file to GitLab:"
-      - "     input/pxe_mapping_file.csv"
+      - "4. If GitLab server is not yet up, copy the generated file to"
+      - "     /opt/omnia/input/project_default/pxe_mapping_file.csv in the omnia_core container"
       - ""
-      - "6. Run:"
-      - "     ansible-playbook provision/provision.yml"
+      - "5. If the GitLab server is up and running, copy the file to"
+      - "     input/pxe_mapping_file.csv in the GitLab project and commit the changes"
+      - "     after building the images using the build pipeline. Committing the PXE mapping"
+      - "     file will automatically trigger the deploy pipeline and deploy the images on"
+      - "     the nodes listed in the newly committed PXE mapping file."
       - "============================================================"
diff --git a/omnia.sh b/omnia.sh
index 353aa9b5ca..21e6f7ce0b 100755
--- a/omnia.sh
+++ b/omnia.sh
@@ -1657,6 +1657,123 @@ phase2_approval() {
     return 0
 }
 
+# ═══════════════════════════════════════════════════════════════════════════
+# validate_backup_disk_space: Pre-upgrade disk space validation
+# Ensures sufficient space exists before backup creation to prevent partial
+# backups due to disk full conditions.
+# ═══════════════════════════════════════════════════════════════════════════
+validate_backup_disk_space() {
+    local backup_base="$1"
+    local safety_multiplier=2  # Require 2× the estimated backup size
+
+    echo "[INFO] [ORCHESTRATOR] Validating disk space for backup..."
+
+    if ! podman ps --format '{{.Names}}' | grep -qw "omnia_core"; then
+        echo "[ERROR] [ORCHESTRATOR] Cannot validate disk space: omnia_core container not running"
+        return 1
+    fi
+
+    # Calculate size of data to be backed up (in KB)
+    local input_size=0
+    local openchami_size=0
+    local metadata_size=0
+
+    # Get input directory size
+    input_size=$(podman exec -u root omnia_core bash -c "
+        if [ -d '$CONTAINER_INPUT_DIR' ]; then
+            du -sk '$CONTAINER_INPUT_DIR' 2>/dev/null | cut -f1
+        else
+            echo 0
+        fi
+    " 2>/dev/null || echo 0)
+
+    # Get OpenCHAMI directory size
+    openchami_size=$(podman exec -u root omnia_core bash -c "
+        if [ -d '/opt/omnia/openchami' ]; then
+            du -sk '/opt/omnia/openchami' 2>/dev/null | cut -f1
+        else
+            echo 0
+        fi
+    " 2>/dev/null || echo 0)
+
+    # Get metadata file size (small, but include for completeness)
+    metadata_size=$(podman exec -u root omnia_core bash -c "
+        if [ -f '$CONTAINER_METADATA_FILE' ]; then
+            du -sk '$CONTAINER_METADATA_FILE' 2>/dev/null | cut -f1
+        else
+            echo 0
+        fi
+    " 2>/dev/null || echo 0)
+
+    # Ensure values are numeric
+    input_size=${input_size:-0}
+    openchami_size=${openchami_size:-0}
+    metadata_size=${metadata_size:-0}
+
+    # Calculate total estimated backup size
+    local total_backup_size_kb=$((input_size + openchami_size + metadata_size))
+    
+    # Add buffer for quadlet files from host (~100KB typical)
+    total_backup_size_kb=$((total_backup_size_kb + 100))
+
+    # Calculate required space with safety multiplier
+    local required_space_kb=$((total_backup_size_kb * safety_multiplier))
+
+    # Convert to human-readable for display
+    local total_backup_size_mb=$((total_backup_size_kb / 1024))
+    local required_space_mb=$((required_space_kb / 1024))
+
+    echo "[INFO] [ORCHESTRATOR] Estimated backup size: ${total_backup_size_mb}MB"
+    echo "[INFO] [ORCHESTRATOR] Required space (${safety_multiplier}× safety margin): ${required_space_mb}MB"
+
+    # Get available space on backup destination filesystem
+    # The backup path is inside the container, which maps to the omnia share
+    local backup_parent_dir
+    backup_parent_dir=$(dirname "$backup_base")
+
+    local available_space_kb
+    available_space_kb=$(podman exec -u root omnia_core bash -c "
+        # Ensure parent directory exists for df check
+        mkdir -p '$backup_parent_dir' 2>/dev/null || true
+        df -k '$backup_parent_dir' 2>/dev/null | tail -1 | awk '{print \$4}'
+    " 2>/dev/null)
+
+    if [ -z "$available_space_kb" ] || ! [[ "$available_space_kb" =~ ^[0-9]+$ ]]; then
+        echo "[WARN] [ORCHESTRATOR] Could not determine available disk space; proceeding with backup"
+        return 0
+    fi
+
+    local available_space_mb=$((available_space_kb / 1024))
+    echo "[INFO] [ORCHESTRATOR] Available space on backup filesystem: ${available_space_mb}MB"
+
+    # Check if sufficient space is available
+    if [ "$available_space_kb" -lt "$required_space_kb" ]; then
+        echo ""
+        echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}"
+        echo -e "${RED}                    INSUFFICIENT DISK SPACE FOR BACKUP${NC}"
+        echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}"
+        echo ""
+        echo -e "${RED}ERROR: Not enough disk space to safely create upgrade backup.${NC}"
+        echo ""
+        echo -e "${YELLOW}Disk Space Summary:${NC}"
+        echo -e "${YELLOW}  - Estimated backup size:  ${total_backup_size_mb}MB${NC}"
+        echo -e "${YELLOW}  - Required space (${safety_multiplier}×):    ${required_space_mb}MB${NC}"
+        echo -e "${YELLOW}  - Available space:        ${available_space_mb}MB${NC}"
+        echo -e "${YELLOW}  - Shortfall:              $((required_space_mb - available_space_mb))MB${NC}"
+        echo ""
+        echo -e "${YELLOW}Backup destination: $backup_base${NC}"
+        echo ""
+        echo -e "${GREEN}Required Action:${NC}"
+        echo -e "${GREEN}  1. Free up at least $((required_space_mb - available_space_mb))MB on the Omnia share${NC}"
+        echo -e "${GREEN}  2. Re-run 'omnia.sh --upgrade' after freeing space${NC}"
+        echo ""
+        return 1
+    fi
+
+    echo "[INFO] [ORCHESTRATOR] Disk space validation passed"
+    return 0
+}
+
 backup_openchami_data() {
     local backup_base="$1"
 
@@ -1668,11 +1785,14 @@ backup_openchami_data() {
         return 0
     fi
 
-    # Create openchami backup directory structure
+    # Create openchami backup directory structure with secure permissions
     if ! podman exec -u root omnia_core bash -c "
         set -e
         mkdir -p '${backup_base%/}/openchami/openchami_data'
+        chmod 0700 '${backup_base%/}/openchami'
         cp -a /opt/omnia/openchami/. '${backup_base%/}/openchami/openchami_data/' 2>&1
+        chmod -R 0600 '${backup_base%/}/openchami/openchami_data'/*
+        find '${backup_base%/}/openchami/openchami_data' -type d -exec chmod 0700 {} \;
     "; then
         echo "[WARN] [ORCHESTRATOR] Failed to backup OpenCHAMI data — upgrade will continue"
         return 0
@@ -1689,19 +1809,33 @@ backup_openchami_data() {
     if [ -f "/etc/systemd/system/openchami.target" ]; then
         podman cp "/etc/systemd/system/openchami.target" \
             "omnia_core:${backup_base%/}/openchami/openchami.target" >/dev/null 2>&1 || true
+        podman exec -u root omnia_core chmod 0600 "${backup_base%/}/openchami/openchami.target" 2>/dev/null || true
         echo "[INFO] [ORCHESTRATOR] openchami.target backed up"
     fi
 
     # Backup quadlet .container files from host (if they exist)
     if ls /etc/containers/systemd/*.container >/dev/null 2>&1; then
-        podman exec -u root omnia_core mkdir -p "${backup_base%/}/openchami/quadlets" 2>/dev/null || true
+        podman exec -u root omnia_core bash -c "mkdir -p '${backup_base%/}/openchami/quadlets' && chmod 0700 '${backup_base%/}/openchami/quadlets'" 2>/dev/null || true
         for qfile in /etc/containers/systemd/*.container; do
             podman cp "$qfile" \
                 "omnia_core:${backup_base%/}/openchami/quadlets/$(basename "$qfile")" >/dev/null 2>&1 || true
+            podman exec -u root omnia_core chmod 0600 "${backup_base%/}/openchami/quadlets/$(basename "$qfile")" 2>/dev/null || true
         done
         echo "[INFO] [ORCHESTRATOR] Quadlet .container files backed up"
     fi
 
+    # Backup quadlet .network files from host (if they exist)
+    # These define Podman networks that enable DNS resolution between containers
+    if ls /etc/containers/systemd/*.network >/dev/null 2>&1; then
+        podman exec -u root omnia_core bash -c "mkdir -p '${backup_base%/}/openchami/quadlets' && chmod 0700 '${backup_base%/}/openchami/quadlets'" 2>/dev/null || true
+        for nfile in /etc/containers/systemd/*.network; do
+            podman cp "$nfile" \
+                "omnia_core:${backup_base%/}/openchami/quadlets/$(basename "$nfile")" >/dev/null 2>&1 || true
+            podman exec -u root omnia_core chmod 0600 "${backup_base%/}/openchami/quadlets/$(basename "$nfile")" 2>/dev/null || true
+        done
+        echo "[INFO] [ORCHESTRATOR] Quadlet .network files backed up"
+    fi
+
     echo "[INFO] [ORCHESTRATOR] OpenCHAMI data backup completed: ${backup_base}/openchami/"
     return 0
 }
@@ -1725,13 +1859,17 @@ phase3_backup_creation() {
         set -e
         rm -rf '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs'
         mkdir -p '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs'
+        chmod 0700 '${backup_base%/}' '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs'
 
         if [ -f '$CONTAINER_INPUT_DIR/default.yml' ]; then
             cp -a '$CONTAINER_INPUT_DIR/default.yml' '${backup_base%/}/input/'
+            chmod 0600 '${backup_base%/}/input/default.yml'
         fi
 
         if [ -d '$CONTAINER_INPUT_DIR/project_default' ]; then
             cp -a '$CONTAINER_INPUT_DIR/project_default' '${backup_base%/}/input/'
+            chmod -R 0600 '${backup_base%/}/input/project_default'/*
+            find '${backup_base%/}/input/project_default' -type d -exec chmod 0700 {} \;
         fi
 
         if [ ! -f '$CONTAINER_METADATA_FILE' ]; then
@@ -1739,6 +1877,7 @@ phase3_backup_creation() {
             exit 1
         fi
         cp -a '$CONTAINER_METADATA_FILE' '${backup_base%/}/metadata/oim_metadata.yml'
+        chmod 0600 '${backup_base%/}/metadata/oim_metadata.yml'
     "; then
         echo "[ERROR] [ORCHESTRATOR] Backup failed; cleaning up partial backup"
         podman exec -u root omnia_core bash -c "rm -rf '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs'" >/dev/null 2>&1 || true
@@ -1751,6 +1890,7 @@ phase3_backup_creation() {
             podman exec -u root omnia_core bash -c "rm -rf '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs'" >/dev/null 2>&1 || true
             return 1
         fi
+        podman exec -u root omnia_core chmod 0600 "${backup_base%/}/configs/omnia_core.container" 2>/dev/null || true
     fi
 
     echo "[INFO] [ORCHESTRATOR] Backup created at: $backup_base"
@@ -2104,6 +2244,12 @@ upgrade_omnia_core() {
         exit 1
     fi
 
+    # Validate disk space before backup creation
+    if ! validate_backup_disk_space "$backup_base"; then
+        echo "[ERROR] [ORCHESTRATOR] Upgrade aborted: Insufficient disk space for backup"
+        exit 1
+    fi
+
     if ! phase3_backup_creation "$backup_base"; then
         echo "[ERROR] [ORCHESTRATOR] Upgrade failed in Phase 3"
         exit 1
@@ -2302,6 +2448,94 @@ rollback_omnia_core() {
         echo -e "${RED}ERROR: Omnia core container is not running.${NC}"
         exit 1
     fi
+
+    # ═══════════════════════════════════════════════════════════════════════════
+    # SAFETY CHECK: Prevent core container rollback if upgrade.yml was run but
+    # rollback.yml has not completed successfully inside the container.
+    # This prevents inconsistent state where core is 2.1 but other components are 2.2.
+    # ═══════════════════════════════════════════════════════════════════════════
+    local upgrade_manifest_path="/opt/omnia/.data/upgrade_manifest.yml"
+    local rollback_manifest_path="/opt/omnia/.data/rollback_manifest.yml"
+
+    # Check if upgrade_manifest.yml exists (indicates upgrade process was started)
+    if podman exec -u root omnia_core test -f "$upgrade_manifest_path" 2>/dev/null; then
+        echo "[INFO] [ROLLBACK] Checking upgrade state before proceeding..."
+
+        # Read component statuses from upgrade_manifest.yml
+        local component_statuses
+        component_statuses=$(podman exec -u root omnia_core grep -A20 'component_status:' "$upgrade_manifest_path" 2>/dev/null | grep -E '^\s+\w+:' | head -8)
+
+        # Check if any component has been upgraded (status is not "pending")
+        local has_upgraded_components=false
+        if echo "$component_statuses" | grep -qvE ':\s*"?pending"?\s*$'; then
+            has_upgraded_components=true
+        fi
+
+        if [ "$has_upgraded_components" = true ]; then
+            echo "[INFO] [ROLLBACK] Detected upgraded components. Checking rollback.yml completion status..."
+
+            # Components have been upgraded - check if rollback.yml completed successfully
+            if podman exec -u root omnia_core test -f "$rollback_manifest_path" 2>/dev/null; then
+                local rollback_status
+                rollback_status=$(podman exec -u root omnia_core grep '^rollback_status:' "$rollback_manifest_path" 2>/dev/null | cut -d':' -f2 | tr -d ' \t\n\r"')
+
+                if [ "$rollback_status" != "completed" ]; then
+                    echo ""
+                    echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}"
+                    echo -e "${RED}                    CORE CONTAINER ROLLBACK BLOCKED${NC}"
+                    echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}"
+                    echo ""
+                    echo -e "${RED}ERROR: Cannot rollback core container at this time.${NC}"
+                    echo ""
+                    echo -e "${YELLOW}Reason: upgrade.yml has upgraded components, but rollback.yml has not${NC}"
+                    echo -e "${YELLOW}        completed successfully inside the container.${NC}"
+                    echo ""
+                    echo -e "${YELLOW}Current rollback status: ${rollback_status:-'unknown'}${NC}"
+                    echo ""
+                    echo -e "${YELLOW}Rolling back the core container now would leave your cluster in an${NC}"
+                    echo -e "${YELLOW}inconsistent state where:${NC}"
+                    echo -e "${YELLOW}  - Core container: 2.1 (rolled back)${NC}"
+                    echo -e "${YELLOW}  - Other components: 2.2 (not rolled back)${NC}"
+                    echo ""
+                    echo -e "${GREEN}Required Action:${NC}"
+                    echo -e "${GREEN}  1. First run rollback.yml inside the container to rollback all components${NC}"
+                    echo -e "${GREEN}  2. Wait for rollback.yml to complete successfully${NC}"
+                    echo -e "${GREEN}  3. Then run 'omnia.sh --rollback' to rollback the core container${NC}"
+                    echo ""
+                    exit 1
+                fi
+                echo "[INFO] [ROLLBACK] Rollback playbook completed successfully. Proceeding with core container rollback."
+            else
+                # Rollback manifest doesn't exist but components were upgraded
+                echo ""
+                echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}"
+                echo -e "${RED}                    CORE CONTAINER ROLLBACK BLOCKED${NC}"
+                echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}"
+                echo ""
+                echo -e "${RED}ERROR: Cannot rollback core container at this time.${NC}"
+                echo ""
+                echo -e "${YELLOW}Reason: upgrade.yml has upgraded components, but rollback.yml has not${NC}"
+                echo -e "${YELLOW}        been executed inside the container.${NC}"
+                echo ""
+                echo -e "${YELLOW}Rolling back the core container now would leave your cluster in an${NC}"
+                echo -e "${YELLOW}inconsistent state where:${NC}"
+                echo -e "${YELLOW}  - Core container: 2.1 (rolled back)${NC}"
+                echo -e "${YELLOW}  - Other components: 2.2 (not rolled back)${NC}"
+                echo ""
+                echo -e "${GREEN}Required Action:${NC}"
+                echo -e "${GREEN}  1. First run rollback.yml inside the container to rollback all components${NC}"
+                echo -e "${GREEN}  2. Wait for rollback.yml to complete successfully${NC}"
+                echo -e "${GREEN}  3. Then run 'omnia.sh --rollback' to rollback the core container${NC}"
+                echo ""
+                exit 1
+            fi
+        else
+            echo "[INFO] [ROLLBACK] No components upgraded yet. Core container rollback is safe to proceed."
+        fi
+    else
+        echo "[INFO] [ROLLBACK] No upgrade manifest found. Core container rollback is safe to proceed."
+    fi
+    # ═══════════════════════════════════════════════════════════════════════════
     
     # Create lock file to prevent concurrent rollbacks
     local lock_file="/tmp/omnia_rollback.lock"
diff --git a/provision/roles/configure_ochami/templates/hpc_tools/install_dcgm.sh.j2 b/provision/roles/configure_ochami/templates/hpc_tools/install_dcgm.sh.j2
index 158e089805..a1e768d9bf 100644
--- a/provision/roles/configure_ochami/templates/hpc_tools/install_dcgm.sh.j2
+++ b/provision/roles/configure_ochami/templates/hpc_tools/install_dcgm.sh.j2
@@ -31,18 +31,12 @@ echo "====================================================="
 # Detect CUDA major version for DCGM package selection
 echo "[INFO] Detecting CUDA version for DCGM package compatibility..."
 # Try to get CUDA version from nvidia-smi
-CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}' | cut -d'.' -f1)
+CUDA_VERSION=$(nvidia-smi | sed -nE 's/.*CUDA( UMD)? Version: *([0-9]+).*/\2/p')
 
-# Fallback: Try to get CUDA version from nvcc if available
 if [ -z "$CUDA_VERSION" ]; then
-    if command -v nvcc &>/dev/null; then
-        CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $5}' | cut -d',' -f1 | cut -d'.' -f1)
-        echo "[INFO] CUDA version detected from nvcc: $CUDA_VERSION"
-    else
-        echo "[ERROR] Could not detect CUDA version from nvidia-smi or nvcc."
-        echo "[ERROR] CUDA toolkit is required for DCGM package version detection. Skipping DCGM setup."
+        echo "[ERROR] Could not detect CUDA version from nvidia-smi"
+        echo "[ERROR] CUDA driver is required for DCGM package version detection. Skipping DCGM setup."
         exit 1
-    fi
 else
     echo "[INFO] CUDA major version detected from nvidia-smi: $CUDA_VERSION"
 fi
diff --git a/provision/roles/telemetry/templates/telemetry/idrac_telemetry/idrac_telemetry_statefulset.yaml.j2 b/provision/roles/telemetry/templates/telemetry/idrac_telemetry/idrac_telemetry_statefulset.yaml.j2
index b0c3dd8b3c..7d56e91d56 100644
--- a/provision/roles/telemetry/templates/telemetry/idrac_telemetry/idrac_telemetry_statefulset.yaml.j2
+++ b/provision/roles/telemetry/templates/telemetry/idrac_telemetry/idrac_telemetry_statefulset.yaml.j2
@@ -72,7 +72,7 @@ spec:
         - ip: "127.0.0.1"
           hostnames:
             - "mysqldb"
-      terminationGracePeriodSeconds: 10
+      terminationGracePeriodSeconds: 120
       tolerations:
       - effect: NoExecute
         key: node.kubernetes.io/not-ready
diff --git a/rollback/playbooks/rollback_slurm.yml b/rollback/playbooks/rollback_slurm.yml
index 14a67eaa85..abec6bbbe6 100644
--- a/rollback/playbooks/rollback_slurm.yml
+++ b/rollback/playbooks/rollback_slurm.yml
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 ---
+
+# ============================================================================
+# Play 1: Pre-flight — manifest gating, BuildStream terminal gate
+# ============================================================================
 - name: Rollback Slurm feature updates
   hosts: localhost
   connection: local
@@ -36,66 +40,83 @@
     - name: Read rollback_manifest.yml
       ansible.builtin.include_vars:
         file: "{{ rollback_manifest_path }}"
-        name: rollback_manifest
+        name: manifest
 
-    - name: Skip if slurm already rolled back
-      ansible.builtin.meta: end_play
+    - name: Initialize slurm_skip
+      ansible.builtin.set_fact:
+        slurm_skip: false
+
+    - name: Set slurm_skip when already completed
+      ansible.builtin.set_fact:
+        slurm_skip: true
       when:
-        - rollback_manifest.component_status[component_name] | default('pending') == 'completed'
+        - manifest.component_status[component_name] | default('pending') == 'completed'
 
     - name: "Mark as skipped — BuildStream terminal gate active (C-24)"
       ansible.builtin.copy:
         content: >-
-          {{ rollback_manifest | combine({
-               'component_status': rollback_manifest.component_status | combine({
+          {{ manifest | combine({
+               'component_status': manifest.component_status | combine({
                  component_name: 'skipped'
                })
              }) | to_nice_yaml }}
         dest: "{{ rollback_manifest_path }}"
         mode: '0644'
+      when:
+        - not slurm_skip
+        - hostvars['localhost']['build_stream_terminal'] | default(false) | bool
+        - manifest.component_status.build_stream | default('pending') == 'completed'
+
+    - name: "Set slurm_skip — BuildStream terminal gate active (C-24)"
+      ansible.builtin.set_fact:
+        slurm_skip: true
       when:
         - hostvars['localhost']['build_stream_terminal'] | default(false) | bool
+        - manifest.component_status.build_stream | default('pending') == 'completed'
 
     - name: "Skip — BuildStream terminal gate active (C-24)"
       ansible.builtin.meta: end_play
       when:
         - hostvars['localhost']['build_stream_terminal'] | default(false) | bool
 
-    - name: Set slurm rollback status to in-progress
-      ansible.builtin.copy:
-        content: >-
-          {{ rollback_manifest | combine({
-               'component_status': rollback_manifest.component_status | combine({
-                 component_name: 'in-progress'
-               })
-             }) | to_nice_yaml }}
-        dest: "{{ rollback_manifest_path }}"
-        mode: '0644'
+    - name: Block when slurm is already completed
+      when: not slurm_skip
+      block:
+        - name: Set slurm rollback status to in-progress
+          ansible.builtin.copy:
+            content: >-
+              {{ manifest | combine({
+                  'component_status': manifest.component_status | combine({
+                    component_name: 'in-progress'
+                  })
+                }) | to_nice_yaml }}
+            dest: "{{ rollback_manifest_path }}"
+            mode: '0644'
 
-    - name: "Display rollback status in-progress — {{ component_name }}"
-      ansible.builtin.debug:
-        msg: "[ROLLBACK] Component '{{ component_name }}' — status changed to: in-progress"
+        - name: "Display rollback status in-progress — {{ component_name }}"
+          ansible.builtin.debug:
+            msg: "[ROLLBACK] Component '{{ component_name }}' — status changed to: in-progress"
 
-    - name: Check for existing reboot state file
-      ansible.builtin.stat:
-        path: /opt/omnia/.data/slurm_rollback_reboot_state.yml
-      register: _reboot_state_stat
+        - name: Check for existing reboot state file
+          ansible.builtin.stat:
+            path: /opt/omnia/.data/slurm_rollback_reboot_state.yml
+          register: _reboot_state_stat
 
-    - name: Load reboot state from previous run
-      ansible.builtin.include_vars:
-        file: /opt/omnia/.data/slurm_rollback_reboot_state.yml
-        name: _reboot_state
-      when: _reboot_state_stat.stat.exists | default(false)
+        - name: Load reboot state from previous run
+          ansible.builtin.include_vars:
+            file: /opt/omnia/.data/slurm_rollback_reboot_state.yml
+            name: _reboot_state
+          when: _reboot_state_stat.stat.exists | default(false)
 
-    - name: Set previously successful reboot list
-      ansible.builtin.set_fact:
-        slurm_previously_rebooted: "{{ _reboot_state.successfully_rebooted | default([]) }}"
-      when: _reboot_state_stat.stat.exists | default(false)
+        - name: Set previously successful reboot list
+          ansible.builtin.set_fact:
+            slurm_previously_rebooted: "{{ _reboot_state.successfully_rebooted | default([]) }}"
+          when: _reboot_state_stat.stat.exists | default(false)
 
-    - name: Initialize previously rebooted list (no prior state)
-      ansible.builtin.set_fact:
-        slurm_previously_rebooted: []
-      when: not (_reboot_state_stat.stat.exists | default(false))
+        - name: Initialize previously rebooted list (no prior state)
+          ansible.builtin.set_fact:
+            slurm_previously_rebooted: []
+          when: not (_reboot_state_stat.stat.exists | default(false))
 
 - name: Create OIM host group for cloud-init/BSS update
   ansible.builtin.import_playbook: ../../utils/create_container_group.yml
@@ -111,8 +132,7 @@
   tasks:
     - name: Skip if slurm upgrade not needed
       ansible.builtin.meta: end_play
-      when:
-        - hostvars['localhost']['slurm_skip'] | default(false) | bool
+      when: hostvars['localhost']['slurm_skip'] | default(false) | bool
 
     - name: Read rollback_manifest.yml
       ansible.builtin.include_vars:
@@ -142,6 +162,7 @@
                     }) | to_nice_yaml }}
             dest: "{{ rollback_manifest_path }}"
             mode: '0644'
+          delegate_to: localhost
 
         - name: End play
           ansible.builtin.meta: end_play
@@ -249,13 +270,13 @@
   tasks:
     - name: Skip if slurm upgrade not needed
       ansible.builtin.meta: end_play
-      when: slurm_skip | default(false) | bool
+      when: hostvars['localhost']['slurm_skip'] | default(false) | bool
 
     - name: Initialize state
       ansible.builtin.set_fact:
         node_status:
           hostname: "{{ inventory_hostname }}"
-          reboot: false # TODO: rename as reboot_failed
+          reboot: false
           ssh: false
           sinfo: false
           unreachable: false
@@ -356,7 +377,7 @@
   tasks:
     - name: Skip if slurm rollback not needed
       ansible.builtin.meta: end_play
-      when: slurm_skip | default(false) | bool
+      when: hostvars['localhost']['slurm_skip'] | default(false) | bool
 
     - name: Set slurm nodes from inventory
       ansible.builtin.set_fact:
diff --git a/rollback/roles/rollback_openchami/tasks/restore_quadlets_and_configs.yml b/rollback/roles/rollback_openchami/tasks/restore_quadlets_and_configs.yml
index 5259a56546..092c7b576e 100644
--- a/rollback/roles/rollback_openchami/tasks/restore_quadlets_and_configs.yml
+++ b/rollback/roles/rollback_openchami/tasks/restore_quadlets_and_configs.yml
@@ -54,7 +54,7 @@
         rollback_oim_host_quadlets: >-
           {{ rollback_oim_host_backup_dir }}/{{ backup_quadlets_subpath }}
 
-    - name: List backed-up quadlet files
+    - name: List backed-up quadlet container files
       ansible.builtin.find:
         paths: "{{ rollback_oim_host_quadlets }}"
         patterns: "*.container"
@@ -63,14 +63,43 @@
       delegate_facts: true
       connection: ssh
 
+    - name: List backed-up quadlet network files
+      ansible.builtin.find:
+        paths: "{{ rollback_oim_host_quadlets }}"
+        patterns: "*.network"
+      register: rollback_network_files
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
     - name: Display backed-up quadlet files found
       ansible.builtin.debug:
         verbosity: 1
         msg: >-
-          Found {{ rollback_quadlet_files.files | length }} quadlet files in backup:
-          {{ rollback_quadlet_files.files | map(attribute='path') | map('basename') | list }}
+          Found {{ rollback_quadlet_files.files | length }} container files and
+          {{ rollback_network_files.files | default([]) | length }} network files in backup:
+          Containers: {{ rollback_quadlet_files.files | map(attribute='path') | map('basename') | list }}
+          Networks: {{ rollback_network_files.files | default([]) | map(attribute='path') | map('basename') | list }}
+
+    # Restore network files FIRST - these must exist before containers can use them
+    - name: Restore each v2.1 network quadlet file to systemd quadlet directory
+      ansible.builtin.copy:
+        src: "{{ item.path }}"
+        dest: "{{ systemd_quadlet_dir }}/{{ item.path | basename }}"
+        remote_src: true
+        owner: root
+        group: root
+        mode: "{{ file_permissions_644 }}"
+      loop: "{{ rollback_network_files.files | default([]) }}"
+      loop_control:
+        label: "{{ item.path | basename }}"
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      when: rollback_network_files.files | default([]) | length > 0
 
-    - name: Restore each v2.1 quadlet file to systemd quadlet directory
+    - name: Restore each v2.1 container quadlet file to systemd quadlet directory
       ansible.builtin.copy:
         src: "{{ item.path }}"
         dest: "{{ systemd_quadlet_dir }}/{{ item.path | basename }}"
@@ -216,7 +245,8 @@
       ansible.builtin.debug:
         msg:
           - "{{ rollback_messages.restore.quadlets_success }}"
-          - "Quadlet files restored: {{ rollback_quadlet_files.files | length }}"
+          - "Container quadlet files restored: {{ rollback_quadlet_files.files | length }}"
+          - "Network quadlet files restored: {{ rollback_network_files.files | default([]) | length }}"
           - "v2.2-only quadlets removed: {{ rollback_v22_only_quadlets | join(', ') }}"
           - "openchami.target: restored from backup (references coresmd.service)"
           - "/etc/openchami: {{ 'restored from backup' if rollback_etc_openchami_backup_stat.stat.exists | default(false) else 'backup NOT found' }}"
diff --git a/rollback/roles/rollback_openchami/tasks/start_v21_containers.yml b/rollback/roles/rollback_openchami/tasks/start_v21_containers.yml
index 61e8505fcb..250208cb3a 100644
--- a/rollback/roles/rollback_openchami/tasks/start_v21_containers.yml
+++ b/rollback/roles/rollback_openchami/tasks/start_v21_containers.yml
@@ -30,6 +30,104 @@
 
 - name: Start v2.1 containers
   block:
+    # ── Create Podman networks if they don't exist ──────────────────────
+    # These networks enable DNS resolution between containers.
+    # Without these networks, containers fail with "Could not resolve host" errors.
+    # We create them directly with podman as a fallback if network quadlet
+    # files weren't in the backup or the network services don't exist.
+    - name: Ensure ochami-internal Podman network exists
+      ansible.builtin.command: >
+        podman network create --ignore ochami-internal
+      register: create_internal_net
+      changed_when: "'ochami-internal' in create_internal_net.stdout"
+      failed_when: false
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
+    - name: Ensure ochami-jwt-internal Podman network exists
+      ansible.builtin.command: >
+        podman network create --ignore ochami-jwt-internal
+      register: create_jwt_net
+      changed_when: "'ochami-jwt-internal' in create_jwt_net.stdout"
+      failed_when: false
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
+    - name: Ensure ochami-cert-internal Podman network exists
+      ansible.builtin.command: >
+        podman network create --ignore ochami-cert-internal
+      register: create_cert_net
+      changed_when: "'ochami-cert-internal' in create_cert_net.stdout"
+      failed_when: false
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
+    - name: Ensure ochami-external Podman network exists
+      ansible.builtin.command: >
+        podman network create --ignore ochami-external
+      register: create_ext_net
+      changed_when: "'ochami-external' in create_ext_net.stdout"
+      failed_when: false
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
+    - name: Display network creation status
+      ansible.builtin.debug:
+        verbosity: 1
+        msg: >-
+          Podman networks ensured: ochami-internal, ochami-jwt-internal,
+          ochami-cert-internal, ochami-external
+
+    # ── Start Podman network services if they exist ─────────────────────
+    # These systemd services may exist from the openchami RPM installation
+    - name: Start OpenCHAMI internal network service
+      ansible.builtin.systemd:
+        name: openchami-internal-network.service
+        state: started
+        enabled: true
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
+    - name: Start OpenCHAMI external network service
+      ansible.builtin.systemd:
+        name: openchami-external-network.service
+        state: started
+        enabled: true
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
+    - name: Start OpenCHAMI cert internal network service
+      ansible.builtin.systemd:
+        name: openchami-cert-internal-network.service
+        state: started
+        enabled: true
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
+    - name: Start OpenCHAMI JWT internal network service
+      ansible.builtin.systemd:
+        name: openchami-jwt-internal-network.service
+        state: started
+        enabled: true
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
+    - name: Wait for network services to initialize
+      ansible.builtin.pause:
+        seconds: 5
+
     # ── Start all OpenCHAMI services ────────────────────────────────────
     - name: Start openchami.target with v2.1 containers
       ansible.builtin.systemd:
diff --git a/upgrade/playbooks/upgrade_slurm.yml b/upgrade/playbooks/upgrade_slurm.yml
index 62375da04c..086ab2edcf 100644
--- a/upgrade/playbooks/upgrade_slurm.yml
+++ b/upgrade/playbooks/upgrade_slurm.yml
@@ -52,10 +52,6 @@
       when:
         - manifest.component_status[component_name] | default('pending') == 'completed'
 
-    - name: Set slurm upgrade directory
-      ansible.builtin.set_fact:
-        slurm_2_1_backup_dir: "{{ manifest.backup_dir }}"
-
     - name: "Mark as skipped — BuildStream terminal gate active (C-24)"
       ansible.builtin.copy:
         content: >-
@@ -78,50 +74,50 @@
         - hostvars['localhost']['build_stream_terminal'] | default(false) | bool
         - manifest.component_status.build_stream | default('pending') == 'completed'
 
-    - name: Set slurm upgrade status to in-progress
-      ansible.builtin.copy:
-        content: >-
-          {{ manifest | combine({
-               'component_status': manifest.component_status | combine({
-                 component_name: 'in-progress'
-               })
-             }) | to_nice_yaml }}
-        dest: "{{ manifest_path }}"
-        mode: '0644'
-      when: not slurm_skip
+    - name: "Skip — BuildStream terminal gate active (C-24)"
+      ansible.builtin.meta: end_play
+      when:
+        - hostvars['localhost']['build_stream_terminal'] | default(false) | bool
 
-    - name: "Display upgrade status in-progress — {{ component_name }}"
-      ansible.builtin.debug:
-        msg: "[UPGRADE] Component '{{ component_name }}' — status changed to: in-progress"
+    - name: Block when slurm is already completed
       when: not slurm_skip
+      block:
+        - name: Set slurm upgrade status to in-progress
+          ansible.builtin.copy:
+            content: >-
+              {{ manifest | combine({
+                  'component_status': manifest.component_status | combine({
+                    component_name: 'in-progress'
+                  })
+                }) | to_nice_yaml }}
+            dest: "{{ manifest_path }}"
+            mode: '0644'
 
-    - name: Check for existing reboot state file
-      ansible.builtin.stat:
-        path: /opt/omnia/.data/slurm_upgrade_reboot_state.yml
-      register: _reboot_state_stat
-      when: not slurm_skip
+        - name: "Display upgrade status in-progress — {{ component_name }}"
+          ansible.builtin.debug:
+            msg: "[UPGRADE] Component '{{ component_name }}' — status changed to: in-progress"
 
-    - name: Load reboot state from previous run
-      ansible.builtin.include_vars:
-        file: /opt/omnia/.data/slurm_upgrade_reboot_state.yml
-        name: _reboot_state
-      when:
-        - not slurm_skip
-        - _reboot_state_stat.stat.exists | default(false)
+        - name: Check for existing reboot state file
+          ansible.builtin.stat:
+            path: /opt/omnia/.data/slurm_upgrade_reboot_state.yml
+          register: _reboot_state_stat
 
-    - name: Set previously successful reboot list
-      ansible.builtin.set_fact:
-        slurm_previously_rebooted: "{{ _reboot_state.successfully_rebooted | default([]) }}"
-      when:
-        - not slurm_skip
-        - _reboot_state_stat.stat.exists | default(false)
+        - name: Load reboot state from previous run
+          ansible.builtin.include_vars:
+            file: /opt/omnia/.data/slurm_upgrade_reboot_state.yml
+            name: _reboot_state
+          when:
+            - _reboot_state_stat.stat.exists | default(false)
 
-    - name: Initialize previously rebooted list (no prior state)
-      ansible.builtin.set_fact:
-        slurm_previously_rebooted: []
-      when:
-        - not slurm_skip
-        - not (_reboot_state_stat.stat.exists | default(false))
+        - name: Set previously successful reboot list
+          ansible.builtin.set_fact:
+            slurm_previously_rebooted: "{{ _reboot_state.successfully_rebooted | default([]) }}"
+          when: _reboot_state_stat.stat.exists | default(false)
+
+        - name: Initialize previously rebooted list (no prior state)
+          ansible.builtin.set_fact:
+            slurm_previously_rebooted: []
+          when: not (_reboot_state_stat.stat.exists | default(false))
 
 # ============================================================================
 # Create OIM host group (needed for cloud-init/BSS update on OIM)
@@ -181,6 +177,7 @@
                     }) | to_nice_yaml }}
             dest: "{{ manifest_path }}"
             mode: '0644'
+          delegate_to: localhost
 
         - name: End play
           ansible.builtin.meta: end_play
@@ -216,23 +213,19 @@
       when: slurm_host_group_map | default({}) | length == 0
 
     - name: SLURM UPGRADE WARNING
-      ansible.builtin.debug:
-        msg: "{{ slurm_upgrade_banner }}"
-      vars:
-        slurm_upgrade_banner:
-          - "[UPGRADE] SLURM CLUSTER — PRE-UPGRADE NOTICE"
-          - ""
-          - "1. NODE REBOOT    — All Slurm/login nodes will reboot. Ensure no critical jobs are running."
-          - "2. PXE MAPPING    — Do not modify Slurm node entries until upgrade completes."
-          - "3. NFS MOUNTS     — Omnia 2.1 mount points are preserved. Do not modify during upgrade."
-          - "4. VAST STORAGE   — Vast storage is not supported during upgrade. Please remove it from omnia_config.yml."
-          - "5. ROLLBACK SCOPE — New NFS mounts added during upgrade will NOT be retained on rollback."
-          - "6. POST-UPGRADE   — Rollback is NOT recommended once all nodes boot with cloud-init complete."
-
-    - name: Pause to display warning
       ansible.builtin.pause:
         seconds: 10
-        prompt: "SLURM UPGRADE - Proceeding in 10 seconds..."
+        prompt: "{{ slurm_upgrade_banner }}"
+      vars:
+        slurm_upgrade_banner: |
+          [UPGRADE] SLURM CLUSTER — PRE-UPGRADE NOTICE
+          ============================================
+          1. NODE REBOOT    — All Slurm/login nodes will reboot. Ensure no critical jobs are running.
+          2. PXE MAPPING    — Do not modify Slurm node entries until upgrade completes.
+          3. NFS MOUNTS     — Omnia 2.1 mount points are preserved. Do not modify during upgrade.
+          4. VAST STORAGE   — Vast storage is not supported during upgrade. Please remove it from omnia_config.yml.
+          5. ROLLBACK SCOPE — New NFS mounts added during upgrade will NOT be retained on rollback.
+          6. POST-UPGRADE   — Rollback is NOT recommended once all nodes boot with cloud-init complete.
 
     - name: Read oim_metadata for oim_node_name (standalone fallback)
       ansible.builtin.include_vars:
@@ -287,13 +280,13 @@
   tasks:
     - name: Skip if slurm upgrade not needed
       ansible.builtin.meta: end_play
-      when: slurm_skip | default(false) | bool
+      when: hostvars['localhost']['slurm_skip'] | default(false) | bool
 
     - name: Initialize state
       ansible.builtin.set_fact:
         node_status:
           hostname: "{{ inventory_hostname }}"
-          reboot: false # TODO: rename as reboot_failed
+          reboot: false
           ssh: false
           sinfo: false
           unreachable: false
@@ -433,7 +426,7 @@
   tasks:
     - name: Skip if slurm upgrade not needed
       ansible.builtin.meta: end_play
-      when: slurm_skip | default(false) | bool
+      when: hostvars['localhost']['slurm_skip'] | default(false) | bool
 
     - name: Set slurm nodes from inventory
       ansible.builtin.set_fact:
diff --git a/upgrade/roles/import_input_parameters/tasks/restore_omnia_config_credentials.yml b/upgrade/roles/import_input_parameters/tasks/restore_omnia_config_credentials.yml
index a78f0de5d3..5bf72a106d 100644
--- a/upgrade/roles/import_input_parameters/tasks/restore_omnia_config_credentials.yml
+++ b/upgrade/roles/import_input_parameters/tasks/restore_omnia_config_credentials.yml
@@ -142,6 +142,10 @@
         ome_password: "{{ credentials_dict.ome_password | default('') }}"
         ufm_username: "{{ credentials_dict.ufm_username | default('') }}"
         ufm_password: "{{ credentials_dict.ufm_password | default('') }}"
+        vast_username: "{{ credentials_dict.vast_username | default('') }}"
+        vast_password: "{{ credentials_dict.vast_password | default('') }}"
+        postgres_user: "{{ credentials_dict.postgres_user | default('') }}"
+        postgres_password: "{{ credentials_dict.postgres_password | default('') }}"
       no_log: true
 
     - name: Write updated content using template
diff --git a/upgrade/roles/import_input_parameters/tasks/transform_powerscale_values.yml b/upgrade/roles/import_input_parameters/tasks/transform_powerscale_values.yml
index ac2477aecf..d1aeb4e253 100644
--- a/upgrade/roles/import_input_parameters/tasks/transform_powerscale_values.yml
+++ b/upgrade/roles/import_input_parameters/tasks/transform_powerscale_values.yml
@@ -121,19 +121,46 @@
              | select('ne', '')
              | first | default('') }}
 
+    - name: Fetch PowerScale secret_path from backup omnia_config
+      ansible.builtin.set_fact:
+        powerscale_secret_path: >-
+          {{ backup_omnia_config.service_k8s_cluster
+             | selectattr('csi_powerscale_driver_secret_file_path', 'defined')
+             | map(attribute='csi_powerscale_driver_secret_file_path')
+             | select('ne', '')
+             | first | default('') }}
+
     - name: Display PowerScale values_path from backup omnia_config
       ansible.builtin.debug:
         msg: "PowerScale values_path from backup omnia_config: {{ powerscale_values_path }}"
 
+    - name: Display PowerScale secret_path from backup omnia_config
+      ansible.builtin.debug:
+        msg: "PowerScale secret_path from backup omnia_config: {{ powerscale_secret_path }}"
+
     - name: Extract values file name from backup omnia_config powerscale_values_path
       ansible.builtin.set_fact:
         powerscale_values_filename: "{{ powerscale_values_path | basename | default('values.yaml') }}"
       when: powerscale_values_path | length > 0
 
+    - name: Extract secret file name from backup omnia_config powerscale_secret_path
+      ansible.builtin.set_fact:
+        powerscale_secret_filename: "{{ powerscale_secret_path | basename | default('secret.yaml') }}"
+      when: powerscale_secret_path | length > 0
+
+    - name: Set default secret filename if path not configured
+      ansible.builtin.set_fact:
+        powerscale_secret_filename: "secret.yaml"
+      when: powerscale_secret_path | default('') | length == 0
+
     - name: Display extracted PowerScale values file name
       ansible.builtin.debug:
         msg: "PowerScale values file name from backup omnia_config: {{ powerscale_values_filename }}"
 
+    - name: Display extracted PowerScale secret file name
+      ansible.builtin.debug:
+        msg: "PowerScale secret file name from backup omnia_config: {{ powerscale_secret_filename }}"
+
     - name: Build dynamic GitHub URL for target version values.yaml
       ansible.builtin.set_fact:
         powerscale_target_values_url: "{{ powerscale_values_github_url_template | replace('{version}', powerscale_v22_version | regex_replace('^v', '')) }}"
@@ -170,12 +197,17 @@
         msg: "{{ merge_values_result.stderr_lines | default([]) }}"
       when: merge_values_result.stderr_lines | default([]) | length > 0
 
-    - name: Copy secret.yaml from v2.1 backup
+    - name: Check if v2.1 secret file exists in backup
+      ansible.builtin.stat:
+        path: "{{ backup_location }}/{{ powerscale_secret_filename }}"
+      register: v21_secret_stat
+
+    - name: Copy secret file from v2.1 backup
       ansible.builtin.copy:
-        src: "{{ backup_location }}/secret.yaml"
-        dest: "{{ input_project_dir }}/secret.yaml"
+        src: "{{ backup_location }}/{{ powerscale_secret_filename }}"
+        dest: "{{ input_project_dir }}/{{ powerscale_secret_filename }}"
         mode: "0600"
-      when: v21_values_stat.stat.exists
+      when: v21_secret_stat.stat.exists
 
     - name: Display PowerScale values.yaml transformation summary
       ansible.builtin.debug:
diff --git a/upgrade/roles/import_input_parameters/templates/omnia_config_credentials.yml.j2 b/upgrade/roles/import_input_parameters/templates/omnia_config_credentials.yml.j2
index 9ca40114aa..80699f1cc2 100644
--- a/upgrade/roles/import_input_parameters/templates/omnia_config_credentials.yml.j2
+++ b/upgrade/roles/import_input_parameters/templates/omnia_config_credentials.yml.j2
@@ -58,3 +58,7 @@ ome_password: "{{ ome_password | default('') }}"
 # UFM telemetry credentials
 ufm_username: "{{ ufm_username | default('') }}"
 ufm_password: "{{ ufm_password | default('') }}"
+
+# VAST telemetry credentials
+vast_username: "{{ vast_username | default('') }}"
+vast_password: "{{ vast_password | default('') }}"
diff --git a/upgrade/roles/import_input_parameters/vars/main.yml b/upgrade/roles/import_input_parameters/vars/main.yml
index 3bac72ac30..4423147336 100644
--- a/upgrade/roles/import_input_parameters/vars/main.yml
+++ b/upgrade/roles/import_input_parameters/vars/main.yml
@@ -353,13 +353,13 @@ msg_powerscale_v22_version_missing: |
   Please check the software_config.json file.
 msg_powerscale_values_transform_summary: |
   PowerScale CSI driver values.yaml transformed: {{ powerscale_v21_version }} to {{ powerscale_v22_version }}.
-  Backup preserved at: {{ backup_location }}/values.yaml
-  Target: {{ input_project_dir }}/values.yaml
+  Backup preserved at: {{ backup_location }}/{{ powerscale_values_filename | default('values.yaml') }}
+  Target: {{ input_project_dir }}/{{ powerscale_values_filename | default('values.yaml') }}
   Changes:
   - Downloaded {{ powerscale_v22_version }} values.yaml template from GitHub
   - Preserved v2.1 settings: isiPath, isiAccessZone, controllerCount, custom configurations
   - Updated to {{ powerscale_v22_version }} structure with new parameters
-  Secret file copied: {{ input_project_dir }}/secret.yaml
+  Secret file copied: {{ input_project_dir }}/{{ powerscale_secret_filename | default('secret.yaml') }}
 
 # PowerScale GitHub URL template for values.yaml
 powerscale_values_github_url_template: "https://raw.githubusercontent.com/dell/helm-charts/csi-isilon-{version}/charts/csi-isilon/values.yaml"
diff --git a/upgrade/roles/upgrade_k8s/tasks/load_version_vars.yml b/upgrade/roles/upgrade_k8s/tasks/load_version_vars.yml
index d930690682..33b51ae6b6 100644
--- a/upgrade/roles/upgrade_k8s/tasks/load_version_vars.yml
+++ b/upgrade/roles/upgrade_k8s/tasks/load_version_vars.yml
@@ -126,8 +126,3 @@
          | selectattr('type', 'equalto', 'tarball')
          | selectattr('package', 'search', 'helm')
          | map(attribute='package') | join }}
-
-# ── Set OIM host ───────────────────────────────────────────────────
-- name: Set oim_host to NFS server IP
-  ansible.builtin.set_fact:
-    oim_host: "{{ k8s_nfs_server_ip }}"
diff --git a/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml b/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml
index e646f78066..2e0889e048 100644
--- a/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml
+++ b/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml
@@ -92,13 +92,13 @@
       ansible.builtin.file:
         path: "{{ openchami_backup_dir }}/openchami/postgresql_backup"
         state: directory
-        mode: "{{ dir_permissions_755 }}"
+        mode: "{{ dir_permissions_700 }}"
 
     - name: Create PostgreSQL backup directory (OIM host — shared path)
       ansible.builtin.file:
         path: "{{ oim_host_backup_dir }}/openchami/postgresql_backup"
         state: directory
-        mode: "{{ dir_permissions_755 }}"
+        mode: "{{ dir_permissions_700 }}"
       delegate_to: oim
       delegate_facts: true
       connection: ssh
@@ -181,6 +181,15 @@
       delegate_facts: true
       connection: ssh
 
+    - name: Set PostgreSQL backup file permissions to 0600
+      ansible.builtin.file:
+        path: "{{ oim_host_backup_dir }}/openchami/postgresql_backup/openchami.sql"
+        mode: "0600"
+      when: pgdump_result.rc == 0
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
     - name: Create empty backup marker if pg_dump failed
       ansible.builtin.copy:
         content: |
@@ -188,7 +197,7 @@
           -- Database may be empty (prepare_oim-only scenario)
           -- stderr: {{ pgdump_result.stderr | default('') | trim }}
         dest: "{{ openchami_backup_dir }}/openchami/postgresql_backup/openchami.sql"
-        mode: "{{ file_permissions_644 }}"
+        mode: "{{ file_permissions_600 }}"
       when: pgdump_result.rc | default(1) != 0
 
     - name: Display pg_dump warning if it failed
@@ -232,7 +241,7 @@
       ansible.builtin.file:
         path: "{{ oim_host_backup_dir }}/{{ backup_etc_openchami_subpath }}"
         state: directory
-        mode: "{{ dir_permissions_755 }}"
+        mode: "{{ dir_permissions_700 }}"
       delegate_to: oim
       delegate_facts: true
       connection: ssh
@@ -249,6 +258,8 @@
       ansible.builtin.shell: |
         set -o pipefail
         cp -a {{ openchami_etc_dir }}/. {{ oim_host_backup_dir }}/{{ backup_etc_openchami_subpath }}/
+        chmod -R 0600 {{ oim_host_backup_dir }}/{{ backup_etc_openchami_subpath }}/*
+        find {{ oim_host_backup_dir }}/{{ backup_etc_openchami_subpath }} -type d -exec chmod 0700 {} \;
       register: etc_openchami_backup_result
       changed_when: etc_openchami_backup_result.rc == 0
       when: etc_openchami_stat.stat.exists | default(false)
@@ -318,7 +329,7 @@
             path: "{{ backup_etc_openchami_subpath }}"
             present: {{ etc_openchami_backup_stat.stat.exists | default(false) }}
         dest: "{{ openchami_backup_dir }}/openchami_backup_manifest.yml"
-        mode: "{{ file_permissions_644 }}"
+        mode: "{{ file_permissions_600 }}"
 
     - name: Display backup completion summary
       ansible.builtin.debug:
diff --git a/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml b/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml
index e2ff8bd7fa..9430b711ac 100644
--- a/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml
+++ b/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml
@@ -525,6 +525,93 @@
       connection: ssh
 
     # --- 9. Start services and recover any failed services ---
+    # Create Podman networks if they don't exist - these enable DNS resolution
+    # between containers (e.g., hydra, postgres hostname resolution)
+    - name: Ensure ochami-internal Podman network exists
+      ansible.builtin.command: >
+        podman network create --ignore ochami-internal
+      register: create_internal_net
+      changed_when: "'ochami-internal' in create_internal_net.stdout"
+      failed_when: false
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
+    - name: Ensure ochami-jwt-internal Podman network exists
+      ansible.builtin.command: >
+        podman network create --ignore ochami-jwt-internal
+      register: create_jwt_net
+      changed_when: "'ochami-jwt-internal' in create_jwt_net.stdout"
+      failed_when: false
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
+    - name: Ensure ochami-cert-internal Podman network exists
+      ansible.builtin.command: >
+        podman network create --ignore ochami-cert-internal
+      register: create_cert_net
+      changed_when: "'ochami-cert-internal' in create_cert_net.stdout"
+      failed_when: false
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
+    - name: Ensure ochami-external Podman network exists
+      ansible.builtin.command: >
+        podman network create --ignore ochami-external
+      register: create_ext_net
+      changed_when: "'ochami-external' in create_ext_net.stdout"
+      failed_when: false
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+
+    # Start Podman network services if they exist (from openchami RPM)
+    - name: Start OpenCHAMI internal network service
+      ansible.builtin.systemd:
+        name: openchami-internal-network.service
+        state: started
+        enabled: true
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
+    - name: Start OpenCHAMI external network service
+      ansible.builtin.systemd:
+        name: openchami-external-network.service
+        state: started
+        enabled: true
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
+    - name: Start OpenCHAMI cert internal network service
+      ansible.builtin.systemd:
+        name: openchami-cert-internal-network.service
+        state: started
+        enabled: true
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
+    - name: Start OpenCHAMI JWT internal network service
+      ansible.builtin.systemd:
+        name: openchami-jwt-internal-network.service
+        state: started
+        enabled: true
+      delegate_to: oim
+      delegate_facts: true
+      connection: ssh
+      failed_when: false
+
+    - name: Wait for network services to initialize
+      ansible.builtin.pause:
+        seconds: 5
+
     - name: Start OpenCHAMI services with new images
       ansible.builtin.systemd:
         name: openchami.target
diff --git a/upgrade/roles/upgrade_openchami/vars/main.yml b/upgrade/roles/upgrade_openchami/vars/main.yml
index 5f0b9bca95..b122cae2de 100644
--- a/upgrade/roles/upgrade_openchami/vars/main.yml
+++ b/upgrade/roles/upgrade_openchami/vars/main.yml
@@ -14,7 +14,9 @@
 ---
 
 # File permissions
+dir_permissions_700: "0700"
 dir_permissions_755: "0755"
+file_permissions_600: "0600"
 file_permissions_644: "0644"
 
 # Manifest path for upgrade state tracking
diff --git a/upgrade/roles/upgrade_telemetry/tasks/include_required_input.yml b/upgrade/roles/upgrade_telemetry/tasks/include_required_input.yml
index d051385353..9655f55130 100644
--- a/upgrade/roles/upgrade_telemetry/tasks/include_required_input.yml
+++ b/upgrade/roles/upgrade_telemetry/tasks/include_required_input.yml
@@ -60,6 +60,7 @@
   when:
     - omnia_config is defined
     - omnia_config.service_k8s_cluster is defined
+    - omnia_config.service_k8s_cluster | length > 0
   tags: always
 
 - name: Set k8s_client_mount_path
@@ -70,13 +71,22 @@
           | first).mount_point }}
   when:
     - storage_config is defined
+    - storage_config.mounts is defined
     - k8s_nfs_storage_name is defined
+    - storage_config.mounts | selectattr('name', 'equalto', k8s_nfs_storage_name) | list | length > 0
   tags: always
+
 # ── Load high_availability_config.yml ──
+- name: Check if high_availability_config.yml exists
+  ansible.builtin.stat:
+    path: "{{ input_project_dir }}/high_availability_config.yml"
+  register: ha_config_stat
+
 - name: Read high_availability_config.yml for kube_vip
   ansible.builtin.include_vars:
     file: "{{ input_project_dir }}/high_availability_config.yml"
     name: ha_config
+  when: ha_config_stat.stat.exists
 
 - name: Debug high_availability_config.yml content
   ansible.builtin.debug:
@@ -90,6 +100,7 @@
     kube_vip: "{{ ha_config.service_k8s_cluster_ha[0].virtual_ip_address | default('') }}"
     cacheable: true
   when:
+    - ha_config is defined
     - ha_config.service_k8s_cluster_ha is defined
     - ha_config.service_k8s_cluster_ha | length > 0
 
diff --git a/upgrade/roles/upgrade_telemetry/tasks/main.yml b/upgrade/roles/upgrade_telemetry/tasks/main.yml
index ee5fd1d282..68c087306c 100644
--- a/upgrade/roles/upgrade_telemetry/tasks/main.yml
+++ b/upgrade/roles/upgrade_telemetry/tasks/main.yml
@@ -54,9 +54,21 @@
 # ── Phase 3: Execute telemetry.sh to redeploy telemetry stack ──
 - name: Phase 3 - Execute telemetry.sh to redeploy telemetry stack
   ansible.builtin.include_tasks: execute_telemetry_sh.yml
+  when:
+    - k8s_client_mount_path is defined
+    - kube_vip is defined
+    - kube_vip | length > 0
+
+- name: Skip telemetry.sh (k8s not configured)
+  ansible.builtin.debug:
+    msg: "Skipping telemetry.sh execution — service_k8s not configured (Slurm-only deployment)."
+  when: k8s_client_mount_path is not defined or kube_vip is not defined
 
 # ── Phase 4: Verify all telemetry pods and set upgrade status ──
 - name: Phase 4 - Verify all telemetry pods and set upgrade status
+  when:
+    - kube_vip is defined
+    - kube_vip | length > 0
   block:
     - name: Get all telemetry pods status
       ansible.builtin.shell:
diff --git a/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml b/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml
index 847ad36af4..e99d7bf80c 100644
--- a/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml
+++ b/upgrade/roles/upgrade_telemetry/tasks/migrate_statefulset.yml
@@ -168,3 +168,43 @@
       when: orphaned_pods.stdout_lines | default([]) | length > 0
       delegate_to: "{{ kube_vip }}"
       connection: ssh
+
+    # ── Cleanup old pre-operator services and deployments ──
+    # The operator creates new services with different names (e.g. vminsert-victoria-cluster),
+    # so the old standalone services become stale and waste LoadBalancer IPs.
+    - name: Find old pre-operator services
+      ansible.builtin.shell: |
+        set -o pipefail
+        kubectl -n {{ telemetry_namespace }} get svc --no-headers 2>/dev/null \
+          | awk '{print $1}' \
+          | grep -xE 'vminsert|vmselect|vmstorage|vmagent' || true
+      register: old_services
+      changed_when: false
+      failed_when: false
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+    - name: Delete old pre-operator services
+      ansible.builtin.command:
+        cmd: kubectl -n {{ telemetry_namespace }} delete svc {{ item }} --timeout=30s
+      loop: "{{ old_services.stdout_lines | default([]) | select() | list }}"
+      changed_when: true
+      failed_when: false
+      when: old_services.stdout_lines | default([]) | select() | list | length > 0
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+    - name: Delete old vmagent deployment (replaced by operator-managed VMAgent)
+      ansible.builtin.shell: |
+        kubectl -n {{ telemetry_namespace }} get deployment {{ old_vmagent_deployment }} --no-headers 2>/dev/null && \
+        kubectl -n {{ telemetry_namespace }} delete deployment {{ old_vmagent_deployment }} --timeout=60s || true
+      changed_when: true
+      failed_when: false
+      delegate_to: "{{ kube_vip }}"
+      connection: ssh
+
+    - name: Display old resource cleanup summary
+      ansible.builtin.debug:
+        msg:
+          - "Old services deleted: {{ old_services.stdout_lines | default([]) | select() | list }}"
+          - "Old vmagent deployment cleanup attempted: {{ old_vmagent_deployment }}"
diff --git a/upgrade/roles/upgrade_telemetry/tasks/patch_idrac_termination_grace_period.yml b/upgrade/roles/upgrade_telemetry/tasks/patch_idrac_termination_grace_period.yml
index 6116afb37f..76755b45b9 100644
--- a/upgrade/roles/upgrade_telemetry/tasks/patch_idrac_termination_grace_period.yml
+++ b/upgrade/roles/upgrade_telemetry/tasks/patch_idrac_termination_grace_period.yml
@@ -48,7 +48,7 @@
     msg: "idrac-telemetry current replica count: {{ idrac_replica_count.stdout }}"
   when: idrac_sts_check.rc == 0
 
-- name: Patch terminationGracePeriodSeconds to 120s for graceful MySQL shutdown
+- name: Patch terminationGracePeriodSeconds for graceful MySQL shutdown
   ansible.builtin.command:
     cmd: >
       kubectl patch statefulset idrac-telemetry -n {{ telemetry_namespace }}
diff --git a/upgrade/roles/upgrade_telemetry/tasks/upgrade_operator.yml b/upgrade/roles/upgrade_telemetry/tasks/upgrade_operator.yml
index 4fa40ba520..40cd32336a 100644
--- a/upgrade/roles/upgrade_telemetry/tasks/upgrade_operator.yml
+++ b/upgrade/roles/upgrade_telemetry/tasks/upgrade_operator.yml
@@ -16,15 +16,29 @@
 # Install / upgrade VictoriaMetrics operator via Helm
 # ============================================================================
 
+- name: Remove finalizers from VictoriaMetrics CRDs (prevents delete hang)
+  ansible.builtin.shell: |
+    set -o pipefail
+    for crd in $(kubectl get crd 2>/dev/null | grep victoriametrics | awk '{print $1}'); do
+      kubectl patch crd "$crd" --type=merge -p '{"metadata":{"finalizers":[]}}' 2>/dev/null || true
+    done
+  changed_when: false
+  failed_when: false
+  delegate_to: "{{ kube_vip }}"
+  connection: ssh
+
 - name: Delete existing VictoriaMetrics CRDs (to fix Helm ownership issues)
   ansible.builtin.shell: |
     set -o pipefail
-    kubectl get crd | grep victoriametrics | awk '{print $1}' | xargs kubectl delete crd 2>/dev/null || true
+    for crd in $(kubectl get crd 2>/dev/null | grep victoriametrics | awk '{print $1}'); do
+      timeout 30 kubectl delete crd "$crd" --timeout=30s 2>/dev/null || true
+    done
   register: crd_delete_result
   changed_when: true
   failed_when: false
   delegate_to: "{{ kube_vip }}"
   connection: ssh
+  timeout: 120
 
 - name: Install VictoriaMetrics operator from tarball
   ansible.builtin.command:
diff --git a/utils/credential_utility/roles/create_config/templates/omnia_credential.j2 b/utils/credential_utility/roles/create_config/templates/omnia_credential.j2
index d9ffd938b4..f1ae1091f6 100644
--- a/utils/credential_utility/roles/create_config/templates/omnia_credential.j2
+++ b/utils/credential_utility/roles/create_config/templates/omnia_credential.j2
@@ -58,3 +58,7 @@ ome_password: ""
 # UFM telemetry credentials
 ufm_username: ""
 ufm_password: ""
+
+# VAST telemetry credentials
+vast_username: ""
+vast_password: ""