From 8bfc0d2ac9be8f7933200f6fc1b45cbeebd1376c Mon Sep 17 00:00:00 2001
From: Simran <sgill@redhat.com>
Date: Fri, 26 Jun 2026 11:59:34 -0700
Subject: [PATCH] Add spoke cluster pre/post-upgrade OCP health check steps for
 ACM interop

---
 .../spoke-upgrade-healthcheck/OWNERS          |   3 +
 .../spoke-upgrade-healthcheck/README.md       |  57 +++
 ...-p2p-spoke-upgrade-healthcheck-commands.sh | 471 ++++++++++++++++++
 ...poke-upgrade-healthcheck-ref.metadata.json |  11 +
 ...rop-p2p-spoke-upgrade-healthcheck-ref.yaml |  37 ++
 .../spoke-upgrade-prehealthcheck/OWNERS       |   3 +
 .../spoke-upgrade-prehealthcheck/README.md    |  31 ++
 ...p-spoke-upgrade-prehealthcheck-commands.sh | 454 +++++++++++++++++
 ...e-upgrade-prehealthcheck-ref.metadata.json |  11 +
 ...-p2p-spoke-upgrade-prehealthcheck-ref.yaml |  28 ++
 10 files changed, 1106 insertions(+)
 create mode 100644 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/OWNERS
 create mode 100644 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/README.md
 create mode 100755 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-commands.sh
 create mode 100644 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.metadata.json
 create mode 100644 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.yaml
 create mode 100644 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/OWNERS
 create mode 100644 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/README.md
 create mode 100755 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-commands.sh
 create mode 100644 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.metadata.json
 create mode 100644 ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.yaml

diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/OWNERS b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/OWNERS
new file mode 100644
index 0000000000000..a95e6bb67979e
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/OWNERS
@@ -0,0 +1,3 @@
+approvers:  &owners
+- cspi-qe-ocp-lp
+reviewers: *owners
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/README.md b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/README.md
new file mode 100644
index 0000000000000..a6ce494f02b05
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/README.md
@@ -0,0 +1,57 @@
+# acm-interop-p2p-spoke-upgrade-healthcheck
+
+Post-upgrade health check for the **ACM managed spoke** after `acm-interop-p2p-spoke-upgrade`.
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `acm-interop-p2p-spoke-upgrade-healthcheck-ref.yaml` | Step registry ref (verification-tests image, same as cucushift) |
+| `acm-interop-p2p-spoke-upgrade-healthcheck-commands.sh` | Sets spoke `KUBECONFIG`, then runs cucushift upgrade healthcheck logic |
+
+The commands script embeds the body of
+[`cucushift-upgrade-healthcheck-commands.sh`](../../../cucushift/upgrade/healthcheck/cucushift-upgrade-healthcheck-commands.sh)
+without modifying the upstream file. When updating health check behavior, change cucushift first, then
+refresh the embedded section in `acm-interop-p2p-spoke-upgrade-healthcheck-commands.sh`.
+
+## Checks
+
+1. MachineConfigPools — not Updating/Degraded, stable for 5 minutes (wait budget `max(ACM_SPOKE_MCP_READY_TIMEOUT_MINUTES, nodes × ACM_SPOKE_MCP_MINUTES_PER_NODE)`; defaults 210m / 35m per node)
+2. Cluster operators — Available, not Progressing, not Degraded (`ACM_SPOKE_CO_READY_TIMEOUT_MINUTES`, default 45m)
+3. Nodes — all Ready
+4. Pods — status dump for reference
+
+## Step timeout
+
+| Setting | Default | Purpose |
+|---------|---------|---------|
+| `timeout` (ref.yaml) | 5h | MCP + CO budgets + margin (fits within 20h job) |
+| `ACM_SPOKE_MCP_READY_TIMEOUT_MINUTES` | 210 | MCP wait floor |
+| `ACM_SPOKE_MCP_MINUTES_PER_NODE` | 35 | MCP wait per node |
+| `ACM_SPOKE_CO_READY_TIMEOUT_MINUTES` | 45 | CO stability poll budget |
+| `grace_period` | 10m | EXIT trap diagnostics after step failure |
+
+## Requirements
+
+| File | Source |
+|------|--------|
+| `${SHARED_DIR}/kubeconfig` | Hub cluster (ipi-install) |
+| Spoke admin kubeconfig | `${SHARED_DIR}/managed-cluster-kubeconfig` when names match, or Hive `ClusterDeployment` secret per spoke |
+
+Optional env `ACM_INTEROP_P2P__HEALTHCHECK__SPOKE_CLUSTERS` limits which spokes are checked.
+When unset, all `ManagedCluster` resources except `local-cluster` are checked.
+
+## Artifacts on failure
+
+| File | Content |
+|------|---------|
+| `spoke-<name>-upgrade-healthcheck-failure.txt` | ClusterVersion, MCP describe, not-Ready node describe, unhealthy CO describe, MCO pods |
+
+## Typical workflow placement
+
+```yaml
+test:
+- ref: acm-interop-p2p-spoke-upgrade
+- ref: acm-interop-p2p-spoke-upgrade-healthcheck
+- ref: interop-tests-openshift-virtualization-upgrade-tests
+```
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-commands.sh b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-commands.sh
new file mode 100755
index 0000000000000..41ea11084aaaf
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-commands.sh
@@ -0,0 +1,471 @@
+#!/bin/bash
+#
+# Post-upgrade health check on ACM managed spoke cluster(s).
+# Health check logic mirrors cucushift-upgrade-healthcheck-commands.sh
+# (ci-operator/step-registry/cucushift/upgrade/healthcheck/) with spoke-specific MCP wait tuning.
+#
+set -euxo pipefail; shopt -s inherit_errexit
+
+typeset hubKubeconfig="${SHARED_DIR}/kubeconfig"
+typeset spokeName='spoke'
+typeset -a spokeNamesArr=()
+typeset -a failedSpokesArr=()
+
+[[ -f "${hubKubeconfig}" ]] || {
+    echo "[ERROR] Hub kubeconfig not found: ${hubKubeconfig}" >&2
+    exit 1
+}
+
+WriteSpokeHealthcheckFailureDiagnostics() {
+    typeset artifactFile="${ARTIFACT_DIR}/spoke-${spokeName}-upgrade-healthcheck-failure.txt"
+    typeset unhealthyMcp mcpName nodeName coName
+
+    {
+        echo "=== oc get clusterversion ==="
+        oc get clusterversion version -o wide 2>&1 || true
+        echo
+        echo "=== oc describe clusterversion version ==="
+        oc describe clusterversion version 2>&1 || true
+        echo
+        echo "=== oc get machineconfigpools ==="
+        oc get machineconfigpools 2>&1 || true
+        echo
+        echo "=== MCP custom-columns (UPDATING/DEGRADED) ==="
+        oc get machineconfigpools \
+            -o 'custom-columns=NAME:metadata.name,CONFIG:spec.configuration.name,UPDATING:status.conditions[?(@.type=="Updating")].status,DEGRADED:status.conditions[?(@.type=="Degraded")].status,DEGRADEDMACHINECOUNT:status.degradedMachineCount' \
+            2>&1 || true
+        unhealthyMcp="$(oc get machineconfigpools \
+            -o 'custom-columns=NAME:metadata.name,UPDATING:status.conditions[?(@.type=="Updating")].status,DEGRADED:status.conditions[?(@.type=="Degraded")].status,DEGRADEDMACHINECOUNT:status.degradedMachineCount' \
+            --no-headers 2>/dev/null | grep -Ev '[[:space:]]False[[:space:]]+False[[:space:]]+0[[:space:]]*$' || true)"
+        if [[ -n "${unhealthyMcp}" ]]; then
+            echo
+            echo "=== oc describe unhealthy MCPs ==="
+            while read -r mcpName _; do
+                [[ -n "${mcpName}" ]] || continue
+                echo "--- ${mcpName} ---"
+                oc describe machineconfigpool "${mcpName}" 2>&1 || true
+            done <<<"${unhealthyMcp}"
+        fi
+        echo
+        echo "=== oc get nodes ==="
+        oc get nodes -o wide 2>&1 || true
+        echo
+        echo "=== oc describe not-Ready nodes ==="
+        while read -r nodeName _; do
+            [[ -n "${nodeName}" ]] || continue
+            echo "--- ${nodeName} ---"
+            oc describe node "${nodeName}" 2>&1 || true
+        done < <(oc get nodes --no-headers 2>/dev/null | awk '$2 != "Ready" {print $1}' || true)
+        echo
+        echo "=== oc get clusteroperators ==="
+        oc get clusteroperators 2>&1 || true
+        echo
+        echo "=== oc describe unhealthy clusteroperators ==="
+        while read -r coName _; do
+            [[ -n "${coName}" ]] || continue
+            echo "--- ${coName} ---"
+            oc describe clusteroperator "${coName}" 2>&1 || true
+        done < <(oc get clusteroperators --no-headers 2>/dev/null | awk '$3 == "False" || $4 == "True" || $5 == "True" {print $1}' || true)
+        echo
+        echo "=== oc get pods -n openshift-machine-config-operator ==="
+        oc get pods -n openshift-machine-config-operator -o wide 2>&1 || true
+    } > "${artifactFile}"
+    : "Wrote spoke upgrade healthcheck diagnostics to ${artifactFile}"
+    true
+}
+
+SpokeHealthcheckFailureCleanup() {
+    typeset ret=$?
+    if (( ret != 0 )); then
+        WriteSpokeHealthcheckFailureDiagnostics || true
+    fi
+    return "${ret}"
+}
+
+DiscoverSpokeClusters() {
+    typeset -n spokeNamesRef="${1:?}"
+    typeset -a rawSpokeNamesArr=()
+    typeset spokeClusterName
+
+    spokeNamesRef=()
+    if [[ -n "${ACM_INTEROP_P2P__HEALTHCHECK__SPOKE_CLUSTERS:-}" ]]; then
+        IFS=',' read -r -a rawSpokeNamesArr <<< "${ACM_INTEROP_P2P__HEALTHCHECK__SPOKE_CLUSTERS}"
+        for spokeClusterName in "${rawSpokeNamesArr[@]}"; do
+            spokeClusterName="$(echo -n "${spokeClusterName}" | xargs)"
+            [[ -n "${spokeClusterName}" ]] || {
+                echo "[ERROR] Empty spoke name in ACM_INTEROP_P2P__HEALTHCHECK__SPOKE_CLUSTERS" >&2
+                return 1
+            }
+            spokeNamesRef+=("${spokeClusterName}")
+        done
+        : "Using spoke list from ACM_INTEROP_P2P__HEALTHCHECK__SPOKE_CLUSTERS: ${spokeNamesRef[*]}"
+        return 0
+    fi
+
+    mapfile -t spokeNamesRef < <(
+        oc get managedcluster \
+            -o jsonpath-as-json='{.items[*].metadata.name}' |
+        jq -r '.[] | select(. != "local-cluster")'
+    )
+    if [[ ${#spokeNamesRef[@]} -eq 0 ]]; then
+        echo "[ERROR] No managed spoke clusters found on hub" >&2
+        return 1
+    fi
+
+    : "Discovered managed spoke clusters: ${spokeNamesRef[*]}"
+    true
+}
+
+ExtractSpokeKubeconfig() {
+    typeset targetSpokeName="${1:?}"
+    typeset spokeKubeconfigPath="${2:?}"
+    typeset adminKubeconfigSecretName
+    typeset managedClusterName
+
+    if [[ -f "${SHARED_DIR}/managed-cluster-kubeconfig" && -f "${SHARED_DIR}/managed-cluster-name" ]]; then
+        managedClusterName="$(tr -d '[:space:]' < "${SHARED_DIR}/managed-cluster-name")"
+        if [[ "${managedClusterName}" == "${targetSpokeName}" ]]; then
+            cp "${SHARED_DIR}/managed-cluster-kubeconfig" "${spokeKubeconfigPath}"
+            : "Using cached kubeconfig from ${SHARED_DIR}/managed-cluster-kubeconfig for spoke '${targetSpokeName}'"
+            return 0
+        fi
+    fi
+
+    if ! oc -n "${targetSpokeName}" get "clusterdeployment/${targetSpokeName}" 1>/dev/null; then
+        echo "[ERROR] ClusterDeployment '${targetSpokeName}' not found on hub; cannot resolve admin kubeconfig" >&2
+        return 1
+    fi
+
+    adminKubeconfigSecretName="$(
+        oc -n "${targetSpokeName}" get "clusterdeployment/${targetSpokeName}" \
+            -o jsonpath='{.spec.clusterMetadata.adminKubeconfigSecretRef.name}'
+    )"
+    [[ -n "${adminKubeconfigSecretName}" ]] || {
+        echo "[ERROR] adminKubeconfigSecretRef is empty for spoke '${targetSpokeName}'" >&2
+        return 1
+    }
+
+    oc -n "${targetSpokeName}" get "secret/${adminKubeconfigSecretName}" \
+        -o jsonpath='{.data.kubeconfig}' |
+        base64 -d > "${spokeKubeconfigPath}"
+
+    [[ -s "${spokeKubeconfigPath}" ]] || {
+        echo "[ERROR] Extracted kubeconfig for spoke '${targetSpokeName}' is empty" >&2
+        return 1
+    }
+
+    true
+}
+
+RunSpokeHealthcheck() {
+    typeset targetSpokeName="${1:?}"
+    typeset spokeKubeconfigPath="${2:?}"
+
+    spokeName="${targetSpokeName}"
+    export KUBECONFIG="${spokeKubeconfigPath}"
+    trap SpokeHealthcheckFailureCleanup EXIT
+
+    : "Post-upgrade health check for spoke '${spokeName}'"
+
+    OC="run_command_oc"
+
+    oc get machineconfig || true
+
+    : "Step #1: Make sure no degraded or updating mcp"
+    wait_mcp_continous_success
+
+    : "Step #2: check all cluster operators get stable and ready"
+    wait_clusteroperators_continous_success
+
+    : "Step #3: Make sure every machine is in 'Ready' status"
+    check_node
+
+    : "Step #4: check all pods are in status running or complete"
+    check_pod
+
+    trap - EXIT
+    : "Post-upgrade health check passed for spoke '${spokeName}'"
+    true
+}
+
+function run_command_oc() {
+    typeset -i try=0 max=40; typeset ret_val
+
+    if [[ "$#" -lt 1 ]]; then
+        return 0
+    fi
+
+    while (( try < max )); do
+        if ret_val=$(oc "$@" 2>&1); then
+            break
+        fi
+        (( try += 1 ))
+        sleep 3
+    done
+
+    if (( try == max )); then
+        echo >&2 "Run:[oc $*]"
+        echo >&2 "Get:[$ret_val]"
+        return 255
+    fi
+
+    echo "${ret_val}"
+}
+
+function check_clusteroperators() {
+    typeset -i tmp_ret=0; typeset tmp_clusteroperator input column last_column_name tmp_clusteroperator_1 rc null_version unavailable_operator degraded_operator
+
+    : "Make sure every operator does not report empty column"
+    tmp_clusteroperator=$(mktemp /tmp/health_check-script.XXXXXX)
+    input="${tmp_clusteroperator}"
+    ${OC} get clusteroperator >"${tmp_clusteroperator}"
+    column=$(head -n 1 "${tmp_clusteroperator}" | awk '{print NF}')
+    last_column_name=$(head -n 1 "${tmp_clusteroperator}" | awk '{print $NF}')
+    if [[ ${last_column_name} == "MESSAGE" ]]; then
+        (( column -= 1 ))
+        tmp_clusteroperator_1=$(mktemp /tmp/health_check-script.XXXXXX)
+        awk -v end=${column} '{for(i=1;i<=end;i++) printf $i"\t"; print ""}' "${tmp_clusteroperator}" > "${tmp_clusteroperator_1}"
+        input="${tmp_clusteroperator_1}"
+    fi
+
+    while IFS= read -r line
+    do
+        rc=$(echo "${line}" | awk '{print NF}')
+        if (( rc != column )); then
+            echo >&2 "The following line have empty column"
+            echo >&2 "${line}"
+            (( tmp_ret += 1 ))
+        fi
+    done < "${input}"
+    rm -f "${tmp_clusteroperator}"
+
+    : "Make sure every operator column reports version"
+    if null_version=$(${OC} get clusteroperator -o json | jq '.items[] | select(.status.versions == null) | .metadata.name') && [[ ${null_version} != "" ]]; then
+        echo >&2 "Null Version: ${null_version}"
+        (( tmp_ret += 1 ))
+    fi
+
+    : "Make sure every operator's AVAILABLE column is True"
+    if unavailable_operator=$(${OC} get clusteroperator | awk '$3 == "False"' | grep "False"); then
+        echo >&2 "Some operator's AVAILABLE is False"
+        echo >&2 "$unavailable_operator"
+        (( tmp_ret += 1 ))
+    fi
+    if ${OC} get clusteroperator -o jsonpath='{.items[].status.conditions[?(@.type=="Available")].status}'| grep -iv "True"; then
+        echo >&2 "Some operators are unavailable, pls run 'oc get clusteroperator -o json' to check"
+        (( tmp_ret += 1 ))
+    fi
+
+    : "Make sure every operator's PROGRESSING column is False"
+    if progressing_operator=$(${OC} get clusteroperator | awk '$4 == "True"' | grep "True"); then
+        echo >&2 "Some operator's PROGRESSING is True"
+        echo >&2 "$progressing_operator"
+        (( tmp_ret += 1 ))
+    fi
+    if ${OC} get clusteroperator -o json | jq '.items[].status.conditions[] | select(.type == "Progressing") | .status' | grep -iv "False"; then
+        echo >&2 "Some operators are Progressing, pls run 'oc get clusteroperator -o json' to check"
+        (( tmp_ret += 1 ))
+    fi
+
+    : "Make sure every operator's DEGRADED column is False"
+    # In disconnected install, openshift-sample often get into Degrade state, so it is better to remove them from cluster from flexy post-action
+    #degraded_operator=$(${OC} get clusteroperator | grep -v "openshift-sample" | awk '$5 == "True"')
+    if degraded_operator=$(${OC} get clusteroperator | awk '$5 == "True"' | grep "True"); then
+        echo >&2 "Some operator's DEGRADED is True"
+        echo >&2 "$degraded_operator"
+        (( tmp_ret += 1 ))
+    fi
+    #co_check=$(${OC} get clusteroperator -o json | jq '.items[] | select(.metadata.name != "openshift-samples") | .status.conditions[] | select(.type == "Degraded") | .status'  | grep -iv 'False')
+    if ${OC} get clusteroperator -o jsonpath='{.items[].status.conditions[?(@.type=="Degraded")].status}'| grep -iv 'False'; then
+        echo >&2 "Some operators are Degraded, pls run 'oc get clusteroperator -o json' to check"
+        (( tmp_ret += 1 ))
+    fi
+
+    return "${tmp_ret}"
+}
+
+function wait_clusteroperators_continous_success() {
+    typeset -i continuousSuccessfulCheck=0 passedCriteria=3
+    typeset -i wMax=$(( ACM_SPOKE_CO_READY_TIMEOUT_MINUTES * 60 )) wInt=60
+    SECONDS=0
+    while (( SECONDS < wMax && continuousSuccessfulCheck < passedCriteria )); do
+        : "Checking CO status (${SECONDS}/${wMax}s, consecutive pass ${continuousSuccessfulCheck}/${passedCriteria})"
+        if check_clusteroperators; then
+            (( continuousSuccessfulCheck += 1 ))
+        else
+            : "cluster operators not ready yet, waiting (${SECONDS}/${wMax}s)"
+            continuousSuccessfulCheck=0
+        fi
+        sleep "${wInt}"
+    done
+    if (( continuousSuccessfulCheck < passedCriteria )); then
+        echo >&2 "Some cluster operator does not get ready or not stable"
+        oc get co
+        return 1
+    fi
+    : "All cluster operators status check PASSED"
+    true
+}
+
+function check_mcp() {
+    typeset updating_mcp unhealthy_mcp tmp_output unhealthy_mcp_names mcp_name
+
+    tmp_output=$(mktemp)
+    oc get machineconfigpools -o custom-columns=NAME:metadata.name,CONFIG:spec.configuration.name,UPDATING:status.conditions[?\(@.type==\"Updating\"\)].status --no-headers > "${tmp_output}" || true
+    if [[ -s "${tmp_output}" ]]; then
+        updating_mcp="$(grep -v "False" "${tmp_output}" || true)"
+        if [[ -n "${updating_mcp}" ]]; then
+            : "Some mcp is updating"
+            echo "${updating_mcp}"
+            rm -f "${tmp_output}"
+            return 1
+        fi
+    else
+        : "Did not run 'oc get machineconfigpools' successfully"
+        rm -f "${tmp_output}"
+        return 1
+    fi
+
+    # Do not check UPDATED on purpose, because some paused mcp would not update itself until unpaused
+    oc get machineconfigpools -o custom-columns=NAME:metadata.name,CONFIG:spec.configuration.name,UPDATING:status.conditions[?\(@.type==\"Updating\"\)].status,DEGRADED:status.conditions[?\(@.type==\"Degraded\"\)].status,DEGRADEDMACHINECOUNT:status.degradedMachineCount --no-headers > "${tmp_output}" || true
+    if [[ -s "${tmp_output}" ]]; then
+        unhealthy_mcp="$(grep -v 'False.*False.*0' "${tmp_output}" || true)"
+        if [[ -n "${unhealthy_mcp}" ]]; then
+            : "Detected unhealthy mcp"
+            echo "${unhealthy_mcp}"
+            : "Real-time detected unhealthy mcp"
+            oc get machineconfigpools -o custom-columns=NAME:metadata.name,CONFIG:spec.configuration.name,UPDATING:status.conditions[?\(@.type==\"Updating\"\)].status,DEGRADED:status.conditions[?\(@.type==\"Degraded\"\)].status,DEGRADEDMACHINECOUNT:status.degradedMachineCount | grep -v 'False.*False.*0' || true
+            : "Real-time full mcp output"
+            oc get machineconfigpools
+            unhealthy_mcp_names=$(echo "${unhealthy_mcp}" | awk '{print $1}')
+            : "Using oc describe to check status of unhealthy mcp"
+            for mcp_name in ${unhealthy_mcp_names}; do
+                : "Name: ${mcp_name}"
+                oc describe mcp "${mcp_name}" || echo >&2 "oc describe mcp ${mcp_name} failed"
+            done
+            rm -f "${tmp_output}"
+            return 2
+        fi
+    else
+        : "Did not run 'oc get machineconfigpools' successfully"
+        rm -f "${tmp_output}"
+        return 1
+    fi
+    rm -f "${tmp_output}"
+    return 0
+}
+
+function wait_mcp_continous_success() {
+    typeset -i nodeCount nodeMcpMinutes totalMcpMinutes wMax wInt=30
+    typeset -i continuousSuccessfulCheck=0 passedCriteria=10  # 5 min × 60 s ÷ 30 s interval
+    typeset -i continuousDegradedCheck=0 degradedCriteria=5
+    typeset -i ret=0
+    nodeCount="$(oc get node -o json | jq '.items | length')"
+    # Spoke post-upgrade MCP budget: max(floor, nodes × minutes-per-node). Cucushift default is 20m/node
+    # with no floor; raised after rehearsal #2065916519451201536 exhausted ~196m on a ~6-node spoke.
+    nodeMcpMinutes=$(( nodeCount * ACM_SPOKE_MCP_MINUTES_PER_NODE ))
+    if (( nodeMcpMinutes > ACM_SPOKE_MCP_READY_TIMEOUT_MINUTES )); then
+        totalMcpMinutes=$nodeMcpMinutes
+    else
+        totalMcpMinutes=$ACM_SPOKE_MCP_READY_TIMEOUT_MINUTES
+    fi
+    wMax=$(( totalMcpMinutes * 60 ))
+    SECONDS=0
+    while (( SECONDS < wMax && continuousSuccessfulCheck < passedCriteria )); do
+        : "Checking MCP status (${SECONDS}/${wMax}s, consecutive pass ${continuousSuccessfulCheck}/${passedCriteria})"
+        ret=0
+        check_mcp || ret=$?
+        if [[ "${ret}" == "0" ]]; then
+            continuousDegradedCheck=0
+            (( continuousSuccessfulCheck += 1 ))
+        elif [[ "${ret}" == "1" ]]; then
+            : "Some machines are updating, waiting (${SECONDS}/${wMax}s)"
+            continuousSuccessfulCheck=0
+            continuousDegradedCheck=0
+        else
+            continuousSuccessfulCheck=0
+            : "Some machines are degraded (${continuousDegradedCheck}/${degradedCriteria}), waiting (${SECONDS}/${wMax}s)"
+            (( continuousDegradedCheck += 1 ))
+            if (( continuousDegradedCheck >= degradedCriteria )); then
+                break
+            fi
+        fi
+        sleep "${wInt}"
+    done
+    if (( continuousSuccessfulCheck < passedCriteria )); then
+        echo >&2 "Some mcp does not get ready or not stable"
+        oc get machineconfigpools
+        return 1
+    fi
+    : "All mcp status check PASSED"
+    true
+}
+
+function check_node() {
+    typeset -i nodeNumber readyNumber
+    nodeNumber="$(
+        oc get node \
+            -o jsonpath-as-json='{.items[*].metadata.name}' |
+        jq 'length'
+    )"
+    readyNumber="$(
+        oc get node -o json |
+        jq '[.items[] | select(.status.conditions[]? | .type == "Ready" and .status == "True")] | length'
+    )"
+    if (( nodeNumber == readyNumber )); then
+        : "All nodes status check PASSED"
+        return 0
+    fi
+    if (( readyNumber == 0 )); then
+        echo >&2 "No any ready node"
+    else
+        echo >&2 "We found failed node"
+        oc get node -o wide
+    fi
+    return 1
+}
+
+function check_pod() {
+    : "Show all pods status for reference/debug"
+    oc get pods --all-namespaces
+    true
+}
+
+# Setup proxy if it's present in the shared dir
+if test -f "${SHARED_DIR}/proxy-conf.sh"
+then
+    # shellcheck disable=SC1091
+    source "${SHARED_DIR}/proxy-conf.sh"
+fi
+
+export KUBECONFIG="${hubKubeconfig}"
+DiscoverSpokeClusters spokeNamesArr
+
+for spokeName in "${spokeNamesArr[@]}"; do
+    export KUBECONFIG="${hubKubeconfig}"
+    spokeName="$(echo -n "${spokeName}" | xargs)"
+    typeset spokeKubeconfigFile
+    spokeKubeconfigFile="$(mktemp /tmp/acm-spoke-healthcheck.XXXXXX.kubeconfig)"
+
+    if ! ExtractSpokeKubeconfig "${spokeName}" "${spokeKubeconfigFile}"; then
+        failedSpokesArr+=("${spokeName}")
+        rm -f "${spokeKubeconfigFile}"
+        continue
+    fi
+
+    if ! RunSpokeHealthcheck "${spokeName}" "${spokeKubeconfigFile}"; then
+        failedSpokesArr+=("${spokeName}")
+    fi
+
+    rm -f "${spokeKubeconfigFile}"
+done
+
+export KUBECONFIG="${hubKubeconfig}"
+
+if [[ ${#failedSpokesArr[@]} -gt 0 ]]; then
+    echo "[ERROR] Post-upgrade health check failed for spoke cluster(s): ${failedSpokesArr[*]}" >&2
+    exit 1
+fi
+
+: "Post-upgrade health check passed for all spoke cluster(s): ${spokeNamesArr[*]}"
+true
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.metadata.json b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.metadata.json
new file mode 100644
index 0000000000000..22db751b1b169
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.metadata.json
@@ -0,0 +1,11 @@
+{
+	"path": "acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.yaml",
+	"owners": {
+		"approvers": [
+			"cspi-qe-ocp-lp"
+		],
+		"reviewers": [
+			"cspi-qe-ocp-lp"
+		]
+	}
+}
\ No newline at end of file
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.yaml b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.yaml
new file mode 100644
index 0000000000000..e6879c49d39d7
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-healthcheck/acm-interop-p2p-spoke-upgrade-healthcheck-ref.yaml
@@ -0,0 +1,37 @@
+ref:
+  as: acm-interop-p2p-spoke-upgrade-healthcheck
+  from_image:
+    namespace: ci
+    name: verification-tests
+    tag: latest
+  grace_period: 10m
+  commands: acm-interop-p2p-spoke-upgrade-healthcheck-commands.sh
+  timeout: 5h0m0s
+  cli: latest
+  resources:
+    limits:
+      cpu: "1"
+    requests:
+      cpu: 100m
+      memory: 100Mi
+  env:
+  - name: ACM_INTEROP_P2P__HEALTHCHECK__SPOKE_CLUSTERS
+    default: ""
+    documentation: Optional comma-separated ManagedCluster names. When empty, all managed spokes except local-cluster are checked.
+  - name: ACM_SPOKE_MCP_MINUTES_PER_NODE
+    default: "35"
+    documentation: Minutes of MCP wait budget per node (cucushift default 20). Rehearsal #2065916519451201536 needed ~33m/node on a 6-node spoke.
+  - name: ACM_SPOKE_MCP_READY_TIMEOUT_MINUTES
+    default: "210"
+    documentation: Minimum MCP wait budget in minutes regardless of node count. Floor above rehearsal failure (~196m) before cucushift 20m/node cap (120m on 6 nodes).
+  - name: ACM_SPOKE_CO_READY_TIMEOUT_MINUTES
+    default: "45"
+    documentation: Cluster operator stability wait budget in minutes (cucushift default 30). Poll interval is 60s.
+  documentation: |-
+    Post-upgrade health check on ACM managed spoke cluster(s). Uses the same logic as
+    cucushift-upgrade-healthcheck per spoke (MCP, cluster operators, nodes, pods).
+    Discovers spokes from the hub or from ACM_INTEROP_P2P__HEALTHCHECK__SPOKE_CLUSTERS.
+    Reuses ${SHARED_DIR}/managed-cluster-kubeconfig when the spoke name matches
+    ${SHARED_DIR}/managed-cluster-name. Step timeout (5h) covers sequential checks;
+    increase job timeout when many spokes are registered.
+    On failure, writes spoke-<name>-upgrade-healthcheck-failure.txt to ${ARTIFACT_DIR}.
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/OWNERS b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/OWNERS
new file mode 100644
index 0000000000000..41d144d3728a2
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/OWNERS
@@ -0,0 +1,3 @@
+approvers: &owners
+- cspi-qe-ocp-lp
+reviewers: *owners
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/README.md b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/README.md
new file mode 100644
index 0000000000000..dab5fedf5141f
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/README.md
@@ -0,0 +1,31 @@
+# ACM Interop P2P Spoke Upgrade Pre-healthcheck
+
+## Purpose
+
+Run cucushift-style pre-upgrade health checks against every ACM managed spoke cluster
+on the hub before `acm-interop-p2p-spoke-upgrade`.
+
+## Process
+
+1. Connect to the ACM hub using `${SHARED_DIR}/kubeconfig`.
+2. Resolve the spoke list from `ACM_INTEROP_P2P__PREHEALTHCHECK__SPOKE_CLUSTERS` when set,
+   or discover all `ManagedCluster` resources except `local-cluster`.
+3. For each spoke, resolve an admin kubeconfig from `${SHARED_DIR}/managed-cluster-kubeconfig`
+   (when the spoke name matches `${SHARED_DIR}/managed-cluster-name`) or from the Hive
+   `ClusterDeployment` admin kubeconfig secret.
+4. Run MCP, ClusterOperator, node, and pod checks on each spoke (same logic as
+   `cucushift-upgrade-prehealthcheck`).
+5. On per-spoke failure, write `spoke-<name>-upgrade-prehealthcheck-failure.txt` to
+   `${ARTIFACT_DIR}`.
+
+## Environment Variables
+
+| Name | Default | Description |
+| --- | --- | --- |
+| `ACM_INTEROP_P2P__PREHEALTHCHECK__SPOKE_CLUSTERS` | empty | Optional comma-separated spoke names. When empty, all managed spokes are checked. |
+
+## Requirements
+
+- Hub kubeconfig at `${SHARED_DIR}/kubeconfig`
+- At least one managed spoke registered with ACM
+- Hive-provisioned spokes must expose `ClusterDeployment.spec.clusterMetadata.adminKubeconfigSecretRef`
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-commands.sh b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-commands.sh
new file mode 100755
index 0000000000000..e12e9360dbf18
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-commands.sh
@@ -0,0 +1,454 @@
+#!/bin/bash
+#
+# Pre-upgrade health check on ACM managed spoke cluster(s).
+# Health check logic mirrors cucushift-upgrade-prehealthcheck-commands.sh
+# (ci-operator/step-registry/cucushift/upgrade/prehealthcheck/) run per spoke kubeconfig.
+#
+set -euxo pipefail; shopt -s inherit_errexit
+
+typeset hubKubeconfig="${SHARED_DIR}/kubeconfig"
+typeset spokeName='spoke'
+typeset -a spokeNamesArr=()
+typeset -a failedSpokesArr=()
+
+[[ -f "${hubKubeconfig}" ]] || {
+    echo "[ERROR] Hub kubeconfig not found: ${hubKubeconfig}" >&2
+    exit 1
+}
+
+WriteSpokePrehealthcheckFailureDiagnostics() {
+    typeset artifactFile="${ARTIFACT_DIR}/spoke-${spokeName}-upgrade-prehealthcheck-failure.txt"
+    typeset unhealthyMcp mcpName nodeName coName
+
+    {
+        echo "=== oc get clusterversion ==="
+        oc get clusterversion version -o wide 2>&1 || true
+        echo
+        echo "=== oc describe clusterversion version ==="
+        oc describe clusterversion version 2>&1 || true
+        echo
+        echo "=== oc get machineconfigpools ==="
+        oc get machineconfigpools 2>&1 || true
+        echo
+        echo "=== MCP custom-columns (UPDATING/DEGRADED) ==="
+        oc get machineconfigpools \
+            -o 'custom-columns=NAME:metadata.name,CONFIG:spec.configuration.name,UPDATING:status.conditions[?(@.type=="Updating")].status,DEGRADED:status.conditions[?(@.type=="Degraded")].status,DEGRADEDMACHINECOUNT:status.degradedMachineCount' \
+            2>&1 || true
+        unhealthyMcp="$(oc get machineconfigpools \
+            -o 'custom-columns=NAME:metadata.name,UPDATING:status.conditions[?(@.type=="Updating")].status,DEGRADED:status.conditions[?(@.type=="Degraded")].status,DEGRADEDMACHINECOUNT:status.degradedMachineCount' \
+            --no-headers 2>/dev/null | grep -Ev '[[:space:]]False[[:space:]]+False[[:space:]]+0[[:space:]]*$' || true)"
+        if [[ -n "${unhealthyMcp}" ]]; then
+            echo
+            echo "=== oc describe unhealthy MCPs ==="
+            while read -r mcpName _; do
+                [[ -n "${mcpName}" ]] || continue
+                echo "--- ${mcpName} ---"
+                oc describe machineconfigpool "${mcpName}" 2>&1 || true
+            done <<<"${unhealthyMcp}"
+        fi
+        echo
+        echo "=== oc get nodes ==="
+        oc get nodes -o wide 2>&1 || true
+        echo
+        echo "=== oc describe not-Ready nodes ==="
+        while read -r nodeName _; do
+            [[ -n "${nodeName}" ]] || continue
+            echo "--- ${nodeName} ---"
+            oc describe node "${nodeName}" 2>&1 || true
+        done < <(oc get nodes --no-headers 2>/dev/null | awk '$2 != "Ready" {print $1}' || true)
+        echo
+        echo "=== oc get clusteroperators ==="
+        oc get clusteroperators 2>&1 || true
+        echo
+        echo "=== oc describe unhealthy clusteroperators ==="
+        while read -r coName _; do
+            [[ -n "${coName}" ]] || continue
+            echo "--- ${coName} ---"
+            oc describe clusteroperator "${coName}" 2>&1 || true
+        done < <(oc get clusteroperators --no-headers 2>/dev/null | awk '$3 == "False" || $4 == "True" || $5 == "True" {print $1}' || true)
+        echo
+        echo "=== oc get pods -n openshift-machine-config-operator ==="
+        oc get pods -n openshift-machine-config-operator -o wide 2>&1 || true
+    } > "${artifactFile}"
+    : "Wrote spoke upgrade prehealthcheck diagnostics to ${artifactFile}"
+    true
+}
+
+SpokePrehealthcheckFailureCleanup() {
+    typeset ret=$?
+    if (( ret != 0 )); then
+        WriteSpokePrehealthcheckFailureDiagnostics || true
+    fi
+    return "${ret}"
+}
+
+DiscoverSpokeClusters() {
+    typeset -n spokeNamesRef="${1:?}"
+    typeset -a rawSpokeNamesArr=()
+    typeset spokeClusterName
+
+    spokeNamesRef=()
+    if [[ -n "${ACM_INTEROP_P2P__PREHEALTHCHECK__SPOKE_CLUSTERS:-}" ]]; then
+        IFS=',' read -r -a rawSpokeNamesArr <<< "${ACM_INTEROP_P2P__PREHEALTHCHECK__SPOKE_CLUSTERS}"
+        for spokeClusterName in "${rawSpokeNamesArr[@]}"; do
+            spokeClusterName="$(echo -n "${spokeClusterName}" | xargs)"
+            [[ -n "${spokeClusterName}" ]] || {
+                echo "[ERROR] Empty spoke name in ACM_INTEROP_P2P__PREHEALTHCHECK__SPOKE_CLUSTERS" >&2
+                return 1
+            }
+            spokeNamesRef+=("${spokeClusterName}")
+        done
+        : "Using spoke list from ACM_INTEROP_P2P__PREHEALTHCHECK__SPOKE_CLUSTERS: ${spokeNamesRef[*]}"
+        return 0
+    fi
+
+    mapfile -t spokeNamesRef < <(
+        oc get managedcluster \
+            -o jsonpath-as-json='{.items[*].metadata.name}' |
+        jq -r '.[] | select(. != "local-cluster")'
+    )
+    if [[ ${#spokeNamesRef[@]} -eq 0 ]]; then
+        echo "[ERROR] No managed spoke clusters found on hub" >&2
+        return 1
+    fi
+
+    : "Discovered managed spoke clusters: ${spokeNamesRef[*]}"
+    true
+}
+
+ExtractSpokeKubeconfig() {
+    typeset targetSpokeName="${1:?}"
+    typeset spokeKubeconfigPath="${2:?}"
+    typeset adminKubeconfigSecretName
+    typeset managedClusterName
+
+    if [[ -f "${SHARED_DIR}/managed-cluster-kubeconfig" && -f "${SHARED_DIR}/managed-cluster-name" ]]; then
+        managedClusterName="$(tr -d '[:space:]' < "${SHARED_DIR}/managed-cluster-name")"
+        if [[ "${managedClusterName}" == "${targetSpokeName}" ]]; then
+            cp "${SHARED_DIR}/managed-cluster-kubeconfig" "${spokeKubeconfigPath}"
+            : "Using cached kubeconfig from ${SHARED_DIR}/managed-cluster-kubeconfig for spoke '${targetSpokeName}'"
+            return 0
+        fi
+    fi
+
+    if ! oc -n "${targetSpokeName}" get "clusterdeployment/${targetSpokeName}" 1>/dev/null; then
+        echo "[ERROR] ClusterDeployment '${targetSpokeName}' not found on hub; cannot resolve admin kubeconfig" >&2
+        return 1
+    fi
+
+    adminKubeconfigSecretName="$(
+        oc -n "${targetSpokeName}" get "clusterdeployment/${targetSpokeName}" \
+            -o jsonpath='{.spec.clusterMetadata.adminKubeconfigSecretRef.name}'
+    )"
+    [[ -n "${adminKubeconfigSecretName}" ]] || {
+        echo "[ERROR] adminKubeconfigSecretRef is empty for spoke '${targetSpokeName}'" >&2
+        return 1
+    }
+
+    oc -n "${targetSpokeName}" get "secret/${adminKubeconfigSecretName}" \
+        -o jsonpath='{.data.kubeconfig}' |
+        base64 -d > "${spokeKubeconfigPath}"
+
+    [[ -s "${spokeKubeconfigPath}" ]] || {
+        echo "[ERROR] Extracted kubeconfig for spoke '${targetSpokeName}' is empty" >&2
+        return 1
+    }
+
+    true
+}
+
+RunSpokePrehealthcheck() {
+    typeset targetSpokeName="${1:?}"
+    typeset spokeKubeconfigPath="${2:?}"
+
+    spokeName="${targetSpokeName}"
+    export KUBECONFIG="${spokeKubeconfigPath}"
+    trap SpokePrehealthcheckFailureCleanup EXIT
+
+    : "Pre-upgrade health check for spoke '${spokeName}'"
+
+    OC="run_command_oc"
+
+    oc get machineconfig
+
+    : "Step #1: Make sure no degraded or updating mcp"
+    wait_mcp_continous_success
+
+    : "Step #2: check all cluster operators get stable and ready"
+    wait_clusteroperators_continous_success
+
+    : "Step #3: Make sure every machine is in 'Ready' status"
+    check_node
+
+    : "Step #4: check all pods are in status running or complete"
+    check_pod
+
+    trap - EXIT
+    : "Pre-upgrade health check passed for spoke '${spokeName}'"
+    true
+}
+
+function run_command_oc() {
+    typeset -i try=0 max=40; typeset ret_val
+
+    if [[ "$#" -lt 1 ]]; then
+        return 0
+    fi
+
+    while (( try < max )); do
+        if ret_val=$(oc "$@" 2>&1); then
+            break
+        fi
+        (( try += 1 ))
+        sleep 3
+    done
+
+    if (( try == max )); then
+        echo >&2 "Run:[oc $*]"
+        echo >&2 "Get:[$ret_val]"
+        return 255
+    fi
+
+    echo "${ret_val}"
+}
+
+function check_clusteroperators() {
+    typeset -i tmp_ret=0; typeset tmp_clusteroperator input column last_column_name tmp_clusteroperator_1 rc null_version unavailable_operator degraded_operator
+
+    : "Make sure every operator does not report empty column"
+    tmp_clusteroperator=$(mktemp /tmp/health_check-script.XXXXXX)
+    input="${tmp_clusteroperator}"
+    ${OC} get clusteroperator >"${tmp_clusteroperator}"
+    column=$(head -n 1 "${tmp_clusteroperator}" | awk '{print NF}')
+    last_column_name=$(head -n 1 "${tmp_clusteroperator}" | awk '{print $NF}')
+    if [[ ${last_column_name} == "MESSAGE" ]]; then
+        (( column -= 1 ))
+        tmp_clusteroperator_1=$(mktemp /tmp/health_check-script.XXXXXX)
+        awk -v end=${column} '{for(i=1;i<=end;i++) printf $i"\t"; print ""}' "${tmp_clusteroperator}" > "${tmp_clusteroperator_1}"
+        input="${tmp_clusteroperator_1}"
+    fi
+
+    while IFS= read -r line
+    do
+        rc=$(echo "${line}" | awk '{print NF}')
+        if (( rc != column )); then
+            echo >&2 "The following line have empty column"
+            echo >&2 "${line}"
+            (( tmp_ret += 1 ))
+        fi
+    done < "${input}"
+    rm -f "${tmp_clusteroperator}"
+
+    : "Make sure every operator column reports version"
+    if null_version=$(${OC} get clusteroperator -o json | jq '.items[] | select(.status.versions == null) | .metadata.name') && [[ ${null_version} != "" ]]; then
+        echo >&2 "Null Version: ${null_version}"
+        (( tmp_ret += 1 ))
+    fi
+
+    : "Make sure every operator's AVAILABLE column is True"
+    if unavailable_operator=$(${OC} get clusteroperator | awk '$3 == "False"' | grep "False"); then
+        echo >&2 "Some operator's AVAILABLE is False"
+        echo >&2 "$unavailable_operator"
+        (( tmp_ret += 1 ))
+    fi
+    if ${OC} get clusteroperator -o jsonpath='{.items[].status.conditions[?(@.type=="Available")].status}'| grep -iv "True"; then
+        echo >&2 "Some operators are unavailable, pls run 'oc get clusteroperator -o json' to check"
+        (( tmp_ret += 1 ))
+    fi
+
+    : "Make sure every operator's PROGRESSING column is False"
+    if progressing_operator=$(${OC} get clusteroperator | awk '$4 == "True"' | grep "True"); then
+        echo >&2 "Some operator's PROGRESSING is True"
+        echo >&2 "$progressing_operator"
+        (( tmp_ret += 1 ))
+    fi
+    if ${OC} get clusteroperator -o json | jq '.items[].status.conditions[] | select(.type == "Progressing") | .status' | grep -iv "False"; then
+        echo >&2 "Some operators are Progressing, pls run 'oc get clusteroperator -o json' to check"
+        (( tmp_ret += 1 ))
+    fi
+
+    : "Make sure every operator's DEGRADED column is False"
+    if degraded_operator=$(${OC} get clusteroperator | awk '$5 == "True"' | grep "True"); then
+        echo >&2 "Some operator's DEGRADED is True"
+        echo >&2 "$degraded_operator"
+        (( tmp_ret += 1 ))
+    fi
+    if ${OC} get clusteroperator -o jsonpath='{.items[].status.conditions[?(@.type=="Degraded")].status}'| grep -iv 'False'; then
+        echo >&2 "Some operators are Degraded, pls run 'oc get clusteroperator -o json' to check"
+        (( tmp_ret += 1 ))
+    fi
+
+    return "${tmp_ret}"
+}
+
+function wait_clusteroperators_continous_success() {
+    typeset -i continuousSuccessfulCheck=0 passedCriteria=3
+    typeset -i wMax=1800 wInt=60  # 30 min (30 iterations × 60 s)
+    SECONDS=0
+    while (( SECONDS < wMax && continuousSuccessfulCheck < passedCriteria )); do
+        : "Checking CO status (${SECONDS}/${wMax}s, consecutive pass ${continuousSuccessfulCheck}/${passedCriteria})"
+        if check_clusteroperators; then
+            (( continuousSuccessfulCheck += 1 ))
+        else
+            : "cluster operators not ready yet, waiting (${SECONDS}/${wMax}s)"
+            continuousSuccessfulCheck=0
+        fi
+        sleep "${wInt}"
+    done
+    if (( continuousSuccessfulCheck < passedCriteria )); then
+        echo >&2 "Some cluster operator does not get ready or not stable"
+        oc get co
+        return 1
+    fi
+    : "All cluster operators status check PASSED"
+    true
+}
+
+function check_mcp() {
+    typeset updating_mcp unhealthy_mcp tmp_output unhealthy_mcp_names mcp_name
+
+    tmp_output=$(mktemp)
+    oc get machineconfigpools -o custom-columns=NAME:metadata.name,CONFIG:spec.configuration.name,UPDATING:status.conditions[?\(@.type==\"Updating\"\)].status --no-headers > "${tmp_output}" || true
+    if [[ -s "${tmp_output}" ]]; then
+        updating_mcp="$(grep -v "False" "${tmp_output}" || true)"
+        if [[ -n "${updating_mcp}" ]]; then
+            : "Some mcp is updating"
+            echo "${updating_mcp}"
+            rm -f "${tmp_output}"
+            return 1
+        fi
+    else
+        : "Did not run 'oc get machineconfigpools' successfully"
+        rm -f "${tmp_output}"
+        return 1
+    fi
+
+    oc get machineconfigpools -o custom-columns=NAME:metadata.name,CONFIG:spec.configuration.name,UPDATING:status.conditions[?\(@.type==\"Updating\"\)].status,DEGRADED:status.conditions[?\(@.type==\"Degraded\"\)].status,DEGRADEDMACHINECOUNT:status.degradedMachineCount --no-headers > "${tmp_output}" || true
+    if [[ -s "${tmp_output}" ]]; then
+        unhealthy_mcp="$(grep -v 'False.*False.*0' "${tmp_output}" || true)"
+        if [[ -n "${unhealthy_mcp}" ]]; then
+            : "Detected unhealthy mcp"
+            echo "${unhealthy_mcp}"
+            oc get machineconfigpools -o custom-columns=NAME:metadata.name,CONFIG:spec.configuration.name,UPDATING:status.conditions[?\(@.type==\"Updating\"\)].status,DEGRADED:status.conditions[?\(@.type==\"Degraded\"\)].status,DEGRADEDMACHINECOUNT:status.degradedMachineCount | grep -v 'False.*False.*0' || true
+            oc get machineconfigpools
+            unhealthy_mcp_names=$(echo "${unhealthy_mcp}" | awk '{print $1}')
+            for mcp_name in ${unhealthy_mcp_names}; do
+                : "Name: ${mcp_name}"
+                oc describe mcp "${mcp_name}" || echo >&2 "oc describe mcp ${mcp_name} failed"
+            done
+            rm -f "${tmp_output}"
+            return 2
+        fi
+    else
+        : "Did not run 'oc get machineconfigpools' successfully"
+        rm -f "${tmp_output}"
+        return 1
+    fi
+    rm -f "${tmp_output}"
+    return 0
+}
+
+function wait_mcp_continous_success() {
+    typeset -i nodeCount wMax wInt=30
+    typeset -i continuousSuccessfulCheck=0 passedCriteria=10  # 5 min × 60 s ÷ 30 s interval
+    typeset -i continuousDegradedCheck=0 degradedCriteria=5
+    typeset -i ret=0
+    nodeCount="$(oc get node -o json | jq '.items | length')"
+    wMax=$(( nodeCount * 20 * 60 ))  # nodes × 20 min × 60 s
+    SECONDS=0
+    while (( SECONDS < wMax && continuousSuccessfulCheck < passedCriteria )); do
+        : "Checking MCP status (${SECONDS}/${wMax}s, consecutive pass ${continuousSuccessfulCheck}/${passedCriteria})"
+        ret=0
+        check_mcp || ret=$?
+        if [[ "${ret}" == "0" ]]; then
+            continuousDegradedCheck=0
+            (( continuousSuccessfulCheck += 1 ))
+        elif [[ "${ret}" == "1" ]]; then
+            : "Some machines are updating, waiting (${SECONDS}/${wMax}s)"
+            continuousSuccessfulCheck=0
+            continuousDegradedCheck=0
+        else
+            continuousSuccessfulCheck=0
+            : "Some machines are degraded (${continuousDegradedCheck}/${degradedCriteria}), waiting (${SECONDS}/${wMax}s)"
+            (( continuousDegradedCheck += 1 ))
+            if (( continuousDegradedCheck >= degradedCriteria )); then
+                break
+            fi
+        fi
+        sleep "${wInt}"
+    done
+    if (( continuousSuccessfulCheck < passedCriteria )); then
+        echo >&2 "Some mcp does not get ready or not stable"
+        oc get machineconfigpools
+        return 1
+    fi
+    : "All mcp status check PASSED"
+    true
+}
+
+function check_node() {
+    typeset -i nodeNumber readyNumber
+    nodeNumber="$(
+        oc get node \
+            -o jsonpath-as-json='{.items[*].metadata.name}' |
+        jq 'length'
+    )"
+    readyNumber="$(
+        oc get node -o json |
+        jq '[.items[] | select(.status.conditions[]? | .type == "Ready" and .status == "True")] | length'
+    )"
+    if (( nodeNumber == readyNumber )); then
+        : "All nodes status check PASSED"
+        return 0
+    fi
+    if (( readyNumber == 0 )); then
+        echo >&2 "No any ready node"
+    else
+        echo >&2 "We found failed node"
+        oc get node -o wide
+    fi
+    return 1
+}
+
+function check_pod() {
+    : "Show all pods status for reference/debug"
+    oc get pods --all-namespaces
+    true
+}
+
+if test -f "${SHARED_DIR}/proxy-conf.sh"; then
+    # shellcheck disable=SC1091
+    source "${SHARED_DIR}/proxy-conf.sh"
+fi
+
+export KUBECONFIG="${hubKubeconfig}"
+DiscoverSpokeClusters spokeNamesArr
+
+for spokeName in "${spokeNamesArr[@]}"; do
+    export KUBECONFIG="${hubKubeconfig}"
+    spokeName="$(echo -n "${spokeName}" | xargs)"
+    typeset spokeKubeconfigFile
+    spokeKubeconfigFile="$(mktemp /tmp/acm-spoke-prehealthcheck.XXXXXX.kubeconfig)"
+
+    if ! ExtractSpokeKubeconfig "${spokeName}" "${spokeKubeconfigFile}"; then
+        failedSpokesArr+=("${spokeName}")
+        rm -f "${spokeKubeconfigFile}"
+        continue
+    fi
+
+    if ! RunSpokePrehealthcheck "${spokeName}" "${spokeKubeconfigFile}"; then
+        failedSpokesArr+=("${spokeName}")
+    fi
+
+    rm -f "${spokeKubeconfigFile}"
+done
+
+export KUBECONFIG="${hubKubeconfig}"
+
+if [[ ${#failedSpokesArr[@]} -gt 0 ]]; then
+    echo "[ERROR] Pre-upgrade health check failed for spoke cluster(s): ${failedSpokesArr[*]}" >&2
+    exit 1
+fi
+
+: "Pre-upgrade health check passed for all spoke cluster(s): ${spokeNamesArr[*]}"
+true
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.metadata.json b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.metadata.json
new file mode 100644
index 0000000000000..a7518251af244
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.metadata.json
@@ -0,0 +1,11 @@
+{
+	"path": "acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.yaml",
+	"owners": {
+		"approvers": [
+			"cspi-qe-ocp-lp"
+		],
+		"reviewers": [
+			"cspi-qe-ocp-lp"
+		]
+	}
+}
\ No newline at end of file
diff --git a/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.yaml b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.yaml
new file mode 100644
index 0000000000000..e8dbd9e5a7395
--- /dev/null
+++ b/ci-operator/step-registry/acm/interop-p2p/spoke-upgrade-prehealthcheck/acm-interop-p2p-spoke-upgrade-prehealthcheck-ref.yaml
@@ -0,0 +1,28 @@
+ref:
+  as: acm-interop-p2p-spoke-upgrade-prehealthcheck
+  from_image:
+    namespace: ci
+    name: verification-tests
+    tag: latest
+  grace_period: 10m
+  commands: acm-interop-p2p-spoke-upgrade-prehealthcheck-commands.sh
+  timeout: 2h0m0s
+  cli: latest
+  resources:
+    limits:
+      cpu: "1"
+    requests:
+      cpu: 100m
+      memory: 100Mi
+  env:
+  - name: ACM_INTEROP_P2P__PREHEALTHCHECK__SPOKE_CLUSTERS
+    default: ""
+    documentation: Optional comma-separated ManagedCluster names. When empty, all managed spokes except local-cluster are checked.
+  documentation: |-
+    Pre-upgrade health check on ACM managed spoke cluster(s). Uses the same logic as
+    cucushift-upgrade-prehealthcheck per spoke (MCP not updating/degraded, cluster
+    operators stable, all nodes Ready). Discovers spokes from the hub or from
+    ACM_INTEROP_P2P__PREHEALTHCHECK__SPOKE_CLUSTERS. Reuses
+    ${SHARED_DIR}/managed-cluster-kubeconfig when the spoke name matches
+    ${SHARED_DIR}/managed-cluster-name. On failure, writes
+    spoke-<name>-upgrade-prehealthcheck-failure.txt to ${ARTIFACT_DIR}.