From 33be85966e7a2a4681fd79306ceef52637c00714 Mon Sep 17 00:00:00 2001 From: Abhijeet Sadawarte Date: Thu, 4 Jun 2026 21:26:52 +0530 Subject: [PATCH] OCPBUGS-86719: Use zero-downtime rollout strategy for console pods The rollout strategy change (maxUnavailable: 0) makes each deployment rollout take a bit longer since the new pod must be Ready before the old pod is terminated. Across the full test suite this adds enough time to push past the 30m limit, so the e2e test timeout is bumped from 30m to 40m. Assisted-by: Claude Code --- .../subresource/deployment/deployment.go | 33 +++++++--- .../subresource/deployment/deployment_test.go | 62 +++++++++++++++---- test-e2e.sh | 6 +- 3 files changed, 79 insertions(+), 22 deletions(-) diff --git a/pkg/console/subresource/deployment/deployment.go b/pkg/console/subresource/deployment/deployment.go index a9d6491e4..32ac04687 100644 --- a/pkg/console/subresource/deployment/deployment.go +++ b/pkg/console/subresource/deployment/deployment.go @@ -184,13 +184,32 @@ func withAffinity( func withStrategy(deployment *appsv1.Deployment, infrastructureConfig *configv1.Infrastructure) { rollingUpdateParams := &appsv1.RollingUpdateDeployment{} if ShouldDeployHA(infrastructureConfig) { - rollingUpdateParams = &appsv1.RollingUpdateDeployment{ - MaxSurge: &intstr.IntOrString{ - IntVal: int32(3), - }, - MaxUnavailable: &intstr.IntOrString{ - IntVal: int32(1), - }, + topology := infrastructureConfig.Status.ControlPlaneTopology + if topology == configv1.DualReplicaTopologyMode || topology == configv1.HighlyAvailableArbiterMode { + // On 2-node topologies the required pod anti-affinity prevents + // scheduling a surge pod when every eligible node already runs + // a console pod, so maxUnavailable must be >= 1 to allow the + // rollout to make progress. + rollingUpdateParams = &appsv1.RollingUpdateDeployment{ + MaxSurge: &intstr.IntOrString{ + IntVal: int32(1), + }, + MaxUnavailable: &intstr.IntOrString{ + IntVal: int32(1), + }, + } + } else { + // On 3+ node topologies a free node is available for the surge + // pod, so maxUnavailable=0 guarantees zero-downtime rollouts: + // no old pod is terminated until its replacement is ready. + rollingUpdateParams = &appsv1.RollingUpdateDeployment{ + MaxSurge: &intstr.IntOrString{ + IntVal: int32(1), + }, + MaxUnavailable: &intstr.IntOrString{ + IntVal: int32(0), + }, + } } } deployment.Spec.Strategy.RollingUpdate = rollingUpdateParams diff --git a/pkg/console/subresource/deployment/deployment_test.go b/pkg/console/subresource/deployment/deployment_test.go index afd28d53f..444fdbfdc 100644 --- a/pkg/console/subresource/deployment/deployment_test.go +++ b/pkg/console/subresource/deployment/deployment_test.go @@ -264,10 +264,10 @@ func TestDefaultDeployment(t *testing.T) { Type: appsv1.RollingUpdateDeploymentStrategyType, RollingUpdate: &appsv1.RollingUpdateDeployment{ MaxSurge: &intstr.IntOrString{ - IntVal: int32(3), + IntVal: int32(1), }, MaxUnavailable: &intstr.IntOrString{ - IntVal: int32(1), + IntVal: int32(0), }, }, }, @@ -343,10 +343,10 @@ func TestDefaultDeployment(t *testing.T) { Type: appsv1.RollingUpdateDeploymentStrategyType, RollingUpdate: &appsv1.RollingUpdateDeployment{ MaxSurge: &intstr.IntOrString{ - IntVal: int32(3), + IntVal: int32(1), }, MaxUnavailable: &intstr.IntOrString{ - IntVal: int32(1), + IntVal: int32(0), }, }, }, @@ -494,10 +494,10 @@ func TestDefaultDeployment(t *testing.T) { Type: appsv1.RollingUpdateDeploymentStrategyType, RollingUpdate: &appsv1.RollingUpdateDeployment{ MaxSurge: &intstr.IntOrString{ - IntVal: int32(3), + IntVal: int32(1), }, MaxUnavailable: &intstr.IntOrString{ - IntVal: int32(1), + IntVal: int32(0), }, }, }, @@ -1518,11 +1518,21 @@ func TestWithStrategy(t *testing.T) { infrastructureConfigSingleReplica := infrastructureConfigWithTopology(configv1.SingleReplicaTopologyMode, configv1.SingleReplicaTopologyMode) infrastructureConfigExternalTopologyHighlyAvailable := infrastructureConfigWithTopology(configv1.ExternalTopologyMode, configv1.HighlyAvailableTopologyMode) infrastructureConfigExternalTopologySingleReplica := infrastructureConfigWithTopology(configv1.ExternalTopologyMode, configv1.SingleReplicaTopologyMode) + infrastructureConfigDualReplica := infrastructureConfigWithTopology(configv1.DualReplicaTopologyMode, configv1.HighlyAvailableTopologyMode) + infrastructureConfigArbiter := infrastructureConfigWithTopology(configv1.HighlyAvailableArbiterMode, configv1.HighlyAvailableTopologyMode) singleReplicaStrategy := appsv1.RollingUpdateDeployment{} - highAvailStrategy := appsv1.RollingUpdateDeployment{ + zeroDowntimeStrategy := appsv1.RollingUpdateDeployment{ MaxSurge: &intstr.IntOrString{ - IntVal: int32(3), + IntVal: int32(1), + }, + MaxUnavailable: &intstr.IntOrString{ + IntVal: int32(0), + }, + } + constrainedHAStrategy := appsv1.RollingUpdateDeployment{ + MaxSurge: &intstr.IntOrString{ + IntVal: int32(1), }, MaxUnavailable: &intstr.IntOrString{ IntVal: int32(1), @@ -1557,7 +1567,7 @@ func TestWithStrategy(t *testing.T) { want: &appsv1.Deployment{ Spec: appsv1.DeploymentSpec{ Strategy: appsv1.DeploymentStrategy{ - RollingUpdate: &highAvailStrategy, + RollingUpdate: &zeroDowntimeStrategy, }, }, }, @@ -1585,7 +1595,35 @@ func TestWithStrategy(t *testing.T) { want: &appsv1.Deployment{ Spec: appsv1.DeploymentSpec{ Strategy: appsv1.DeploymentStrategy{ - RollingUpdate: &highAvailStrategy, + RollingUpdate: &zeroDowntimeStrategy, + }, + }, + }, + }, + { + name: "Test DualReplica Strategy uses maxUnavailable=1", + args: args{ + deployment: &appsv1.Deployment{}, + infrastructureConfig: infrastructureConfigDualReplica, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Strategy: appsv1.DeploymentStrategy{ + RollingUpdate: &constrainedHAStrategy, + }, + }, + }, + }, + { + name: "Test Arbiter Strategy uses maxUnavailable=1", + args: args{ + deployment: &appsv1.Deployment{}, + infrastructureConfig: infrastructureConfigArbiter, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Strategy: appsv1.DeploymentStrategy{ + RollingUpdate: &constrainedHAStrategy, }, }, }, @@ -1891,10 +1929,10 @@ func TestDefaultDownloadsDeployment(t *testing.T) { Type: appsv1.RollingUpdateDeploymentStrategyType, RollingUpdate: &appsv1.RollingUpdateDeployment{ MaxSurge: &intstr.IntOrString{ - IntVal: int32(3), + IntVal: int32(1), }, MaxUnavailable: &intstr.IntOrString{ - IntVal: int32(1), + IntVal: int32(0), }, }, }, diff --git a/test-e2e.sh b/test-e2e.sh index 298bfe9e4..77e9c031b 100755 --- a/test-e2e.sh +++ b/test-e2e.sh @@ -9,7 +9,7 @@ OPENSHIFT_CI=${OPENSHIFT_CI:=false} echo "Running tests..." if [ "$OPENSHIFT_CI" = true ]; then - KUBERNETES_CONFIG=${KUBECONFIG} go test -timeout 30m -v ./test/e2e/ 2>&1 | tee "$ARTIFACT_DIR/test.out" + KUBERNETES_CONFIG=${KUBECONFIG} go test -timeout 40m -v ./test/e2e/ 2>&1 | tee "$ARTIFACT_DIR/test.out" RESULT="${PIPESTATUS[0]}" go-junit-report < "$ARTIFACT_DIR/test.out" > "$ARTIFACT_DIR/junit.xml" @@ -17,8 +17,8 @@ if [ "$OPENSHIFT_CI" = true ]; then exit 255 fi else - echo 'KUBERNETES_CONFIG=${KUBECONFIG} go test -timeout 30m -v ./test/e2e/' - KUBERNETES_CONFIG=${KUBECONFIG} go test -timeout 30m -v ./test/e2e/ + echo 'KUBERNETES_CONFIG=${KUBECONFIG} go test -timeout 40m -v ./test/e2e/' + KUBERNETES_CONFIG=${KUBECONFIG} go test -timeout 40m -v ./test/e2e/ fi echo "Success" \ No newline at end of file