From 2f343bed0092b1c2ab111a13ac5ba87ba8ae347a Mon Sep 17 00:00:00 2001 From: Ondra Kupka Date: Tue, 17 Feb 2026 13:11:14 +0100 Subject: [PATCH] controller/workload: Rework failure detection and condition reporting Rework how the workload controller reports Degraded and Progressing conditions to support scaling without false positives, and improve failure detection using pod-level Ready conditions. Key changes: - Stop consulting deployment's observed vs desired generation for Progressing; instead use the Progressing condition's NewReplicaSetAvailable reason, so scaling does not trigger Progressing=True. - Report ProgressDeadlineExceeded via both DeploymentDegraded and DeploymentProgressing (set to False) when the deployment controller signals a timeout. - Replace container-level failure inspection with pod Ready condition checks gated by per-pod ProgressDeadlineSeconds. Filter out terminating pods to avoid false positives from old ReplicaSets. - Detect flapping Ready conditions: when MinReadySeconds > 0 and a pod keeps toggling Ready without staying stable, flag it as failing once the combined progressDeadline + minReadySeconds window has elapsed. - Move SyncError condition into a defer so it is always set regardless of the return path. - Always call PodContainersStatus when reporting UnavailablePod to include container-level diagnostics in the degraded message. --- .../apiserver/controller/workload/workload.go | 200 ++- .../controller/workload/workload_test.go | 1123 ++++++++++++++--- 2 files changed, 1107 insertions(+), 216 deletions(-) diff --git a/pkg/operator/apiserver/controller/workload/workload.go b/pkg/operator/apiserver/controller/workload/workload.go index 7d031f5eda..ffb169a50d 100644 --- a/pkg/operator/apiserver/controller/workload/workload.go +++ b/pkg/operator/apiserver/controller/workload/workload.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "strings" + "time" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -16,6 +17,7 @@ import ( corev1listers "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" + "k8s.io/utils/ptr" operatorv1 "github.com/openshift/api/operator/v1" applyoperatorv1 "github.com/openshift/client-go/operator/applyconfigurations/operator/v1" @@ -217,10 +219,7 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o }() if !preconditionsReady { - var message string - for _, err := range errs { - message = message + err.Error() + "\n" - } + message := errMessage(errs) if len(message) == 0 { message = "the operator didn't specify what preconditions are missing" } @@ -248,24 +247,14 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o return kerrors.NewAggregate(errs) } - if len(errs) > 0 { - message := "" - for _, err := range errs { - message = message + err.Error() + "\n" + defer func() { + if len(errs) > 0 { + workloadDegradedCondition = workloadDegradedCondition. + WithStatus(operatorv1.ConditionTrue). + WithReason("SyncError"). + WithMessage(errMessage(errs)) } - workloadDegradedCondition = workloadDegradedCondition. - WithStatus(operatorv1.ConditionTrue). - WithReason("SyncError"). - WithMessage(message) - } else if workload == nil { - workloadDegradedCondition = workloadDegradedCondition. - WithStatus(operatorv1.ConditionTrue). - WithReason("NoDeployment"). - WithMessage(fmt.Sprintf("deployment/%s: could not be retrieved", c.targetNamespace)) - } else { - workloadDegradedCondition = workloadDegradedCondition. - WithStatus(operatorv1.ConditionFalse) - } + }() if workload == nil { message := fmt.Sprintf("deployment/%s: could not be retrieved", c.targetNamespace) @@ -284,42 +273,53 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o WithReason("NoDeployment"). WithMessage(message) + workloadDegradedCondition = workloadDegradedCondition. + WithStatus(operatorv1.ConditionTrue). + WithReason("NoDeployment"). + WithMessage(message) + return kerrors.NewAggregate(errs) } + workloadDegradedCondition = workloadDegradedCondition.WithStatus(operatorv1.ConditionFalse) + if workload.Status.AvailableReplicas == 0 { deploymentAvailableCondition = deploymentAvailableCondition. WithStatus(operatorv1.ConditionFalse). WithReason("NoPod"). - WithMessage(fmt.Sprintf("no %s.%s pods available on any node.", workload.Name, c.targetNamespace)) + WithMessage(fmt.Sprintf("no %s.%s pods available on any node", workload.Name, c.targetNamespace)) } else { deploymentAvailableCondition = deploymentAvailableCondition. WithStatus(operatorv1.ConditionTrue). WithReason("AsExpected") } - desiredReplicas := int32(1) - if workload.Spec.Replicas != nil { - desiredReplicas = *(workload.Spec.Replicas) + desiredReplicas := ptr.Deref(workload.Spec.Replicas, 1) + + // Update is done when the deployment controller has reported NewReplicaSetAvailable. + // Checking the current vs. observed generation here is not possible since we don't want to be Progressing on scaling. + progressTimedOutMessage, workloadIsBeingUpdatedTooLong := hasDeploymentTimedOutProgressing(workload.Status) + workloadIsBeingUpdated := !hasDeploymentProgressed(workload.Status) && !workloadIsBeingUpdatedTooLong + + var progressDeadlineExceededMessage string + if workloadIsBeingUpdatedTooLong { + progressDeadlineExceededMessage = fmt.Sprintf("deployment/%s.%s has timed out progressing: %s", workload.Name, c.targetNamespace, progressTimedOutMessage) } - // If the workload is up to date, then we are no longer progressing - workloadAtHighestGeneration := workload.ObjectMeta.Generation == workload.Status.ObservedGeneration - // Update is done when all pods have been updated to the latest revision - // and the deployment controller has reported NewReplicaSetAvailable - workloadIsBeingUpdated := !workloadAtHighestGeneration || !hasDeploymentProgressed(workload.Status) - workloadIsBeingUpdatedTooLong := v1helpers.IsUpdatingTooLong(previousStatus, *deploymentProgressingCondition.Type) - if !workloadAtHighestGeneration { - deploymentProgressingCondition = deploymentProgressingCondition. - WithStatus(operatorv1.ConditionTrue). - WithReason("NewGeneration"). - WithMessage(fmt.Sprintf("deployment/%s.%s: observed generation is %d, desired generation is %d.", workload.Name, c.targetNamespace, workload.Status.ObservedGeneration, workload.ObjectMeta.Generation)) - } else if workloadIsBeingUpdated { + switch { + case workloadIsBeingUpdated: deploymentProgressingCondition = deploymentProgressingCondition. WithStatus(operatorv1.ConditionTrue). WithReason("PodsUpdating"). - WithMessage(fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest generation and %d/%d pods are available", workload.Name, c.targetNamespace, workload.Status.UpdatedReplicas, desiredReplicas, workload.Status.AvailableReplicas, desiredReplicas)) - } else { + WithMessage(fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest revision and %d/%d pods are available", workload.Name, c.targetNamespace, workload.Status.UpdatedReplicas, desiredReplicas, workload.Status.AvailableReplicas, desiredReplicas)) + + case workloadIsBeingUpdatedTooLong: + deploymentProgressingCondition = deploymentProgressingCondition. + WithStatus(operatorv1.ConditionFalse). + WithReason("ProgressDeadlineExceeded"). + WithMessage(progressDeadlineExceededMessage) + + default: // Terminating pods don't account for any of the other status fields but // still can exist in a state when they are accepting connections and would // contribute to unexpected behavior when we report Progressing=False. @@ -332,22 +332,42 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o WithReason("AsExpected") } - // During a rollout the default maxSurge (25%) will allow the available - // replicas to temporarily exceed the desired replica count. If this were - // to occur, the operator should not report degraded. - workloadHasAllPodsAvailable := workload.Status.AvailableReplicas >= desiredReplicas - if !workloadHasAllPodsAvailable && (!workloadIsBeingUpdated || workloadIsBeingUpdatedTooLong) { - numNonAvailablePods := desiredReplicas - workload.Status.AvailableReplicas + switch { + case workloadIsBeingUpdatedTooLong: deploymentDegradedCondition = deploymentDegradedCondition. WithStatus(operatorv1.ConditionTrue). - WithReason("UnavailablePod") - podContainersStatus, err := deployment.PodContainersStatus(workload, c.podsLister) + WithReason("ProgressDeadlineExceeded"). + WithMessage(progressDeadlineExceededMessage) + + // The following case handles Degraded when not progressing, particularly on scaling. + case !workloadIsBeingUpdated && workload.Status.AvailableReplicas < desiredReplicas: + hasFailing, err := hasFailingPods(workload, c.podsLister, time.Now()) if err != nil { - podContainersStatus = []string{fmt.Sprintf("failed to get pod containers details: %v", err)} + errs = append(errs, err) } - deploymentDegradedCondition = deploymentDegradedCondition. - WithMessage(fmt.Sprintf("%v of %v requested instances are unavailable for %s.%s (%s)", numNonAvailablePods, desiredReplicas, workload.Name, c.targetNamespace, strings.Join(podContainersStatus, ", "))) - } else { + if hasFailing || workload.Status.AvailableReplicas == 0 { + containerMessages, err := deployment.PodContainersStatus(workload, c.podsLister) + if err != nil { + errs = append(errs, err) + } + var failureDescription string + if len(containerMessages) > 0 { + failureDescription = ` (` + strings.Join(containerMessages, ", ") + `)` + } + + numUnavailable := desiredReplicas - workload.Status.AvailableReplicas + message := fmt.Sprintf("%d of %d requested instances are unavailable for %s.%s%s", numUnavailable, desiredReplicas, workload.Name, c.targetNamespace, failureDescription) + deploymentDegradedCondition = deploymentDegradedCondition. + WithStatus(operatorv1.ConditionTrue). + WithReason("UnavailablePod"). + WithMessage(message) + } else { + deploymentDegradedCondition = deploymentDegradedCondition. + WithStatus(operatorv1.ConditionFalse). + WithReason("AsExpected") + } + + default: deploymentDegradedCondition = deploymentDegradedCondition. WithStatus(operatorv1.ConditionFalse). WithReason("AsExpected") @@ -356,8 +376,11 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o // if the deployment is all available and at the expected generation, then update the version to the latest // when we update, the image pull spec should immediately be different, which should immediately cause a deployment rollout // which should immediately result in a deployment generation diff, which should cause this block to be skipped until it is ready. - workloadHasAllPodsUpdated := workload.Status.UpdatedReplicas == desiredReplicas - if workloadAtHighestGeneration && workloadHasAllPodsAvailable && workloadHasAllPodsUpdated && operatorConfigAtHighestGeneration { + if operatorConfigAtHighestGeneration && + workload.ObjectMeta.Generation == workload.Status.ObservedGeneration && + workload.Status.AvailableReplicas == desiredReplicas && + workload.Status.UpdatedReplicas == desiredReplicas { + c.versionRecorder.SetVersion(c.constructOperandNameFor(workload.Name), c.targetOperandVersion) } @@ -386,6 +409,77 @@ func hasDeploymentProgressed(status appsv1.DeploymentStatus) bool { return false } +// hasDeploymentTimedOutProgressing returns true if the deployment reports ProgressDeadlineExceeded. +// The function returns the Progressing condition message as the first return value. +func hasDeploymentTimedOutProgressing(status appsv1.DeploymentStatus) (string, bool) { + for _, cond := range status.Conditions { + if cond.Type == appsv1.DeploymentProgressing { + return cond.Message, cond.Status == corev1.ConditionFalse && cond.Reason == "ProgressDeadlineExceeded" + } + } + return "", false +} + +func hasFailingPods(workload *appsv1.Deployment, podsLister corev1listers.PodLister, now time.Time) (bool, error) { + selector, err := metav1.LabelSelectorAsSelector(workload.Spec.Selector) + if err != nil { + return false, err + } + pods, err := podsLister.Pods(workload.Namespace).List(selector) + if err != nil { + return false, err + } + + progressDeadline := time.Duration(ptr.Deref(workload.Spec.ProgressDeadlineSeconds, 600)) * time.Second + minReady := time.Duration(workload.Spec.MinReadySeconds) * time.Second + + for _, pod := range pods { + if pod.DeletionTimestamp != nil { + continue + } + + readyCond := findPodReadyCondition(pod) + deadline := pod.CreationTimestamp.Time.Add(progressDeadline) + + if (readyCond == nil || readyCond.Status != corev1.ConditionTrue) && now.After(deadline) { + return true, nil + } + + // Detect flapping Ready condition: the pod is currently Ready but its + // Ready condition transitioned too recently to count as available + // (hasn't stayed continuously ready for MinReadySeconds). + // + // Make the check only relevant after ProgressDeadlineSeconds + minReadySeconds. + if minReady > 0 && readyCond != nil && readyCond.Status == corev1.ConditionTrue { + isRelevant := now.After(pod.CreationTimestamp.Time.Add(progressDeadline + minReady)) + if isRelevant && now.Sub(readyCond.LastTransitionTime.Time) < minReady { + return true, nil + } + } + } + return false, nil +} + +func findPodReadyCondition(pod *corev1.Pod) *corev1.PodCondition { + for i := range pod.Status.Conditions { + if pod.Status.Conditions[i].Type == corev1.PodReady { + return &pod.Status.Conditions[i] + } + } + return nil +} + +func errMessage(errs []error) string { + var b strings.Builder + for i, err := range errs { + if i > 0 { + b.WriteString("\n") + } + b.WriteString(err.Error()) + } + return b.String() +} + // EnsureAtMostOnePodPerNode updates the deployment spec to prevent more than // one pod of a given replicaset from landing on a node. It accomplishes this // by adding a label on the template and updates the pod anti-affinity term to include that label. diff --git a/pkg/operator/apiserver/controller/workload/workload_test.go b/pkg/operator/apiserver/controller/workload/workload_test.go index fb6d1b8c97..2c15fea760 100644 --- a/pkg/operator/apiserver/controller/workload/workload_test.go +++ b/pkg/operator/apiserver/controller/workload/workload_test.go @@ -6,6 +6,8 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" corev1listers "k8s.io/client-go/listers/core/v1" @@ -57,13 +59,15 @@ func TestUpdateOperatorStatus(t *testing.T) { workload *appsv1.Deployment pods []*corev1.Pod + podListErr error operatorConfigAtHighestRevision bool operatorPreconditionsNotReady bool preconditionError error errors []error previousConditions []operatorv1.OperatorCondition - validateOperatorStatus func(*operatorv1.OperatorStatus) error + validateOperatorStatus func(*operatorv1.OperatorStatus) error + validateVersionRecorder func(*fakeVersionRecorder) error }{ { name: "scenario: no workload, no errors thus we are degraded and we are progressing", @@ -112,7 +116,7 @@ func TestUpdateOperatorStatus(t *testing.T) { { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), Status: operatorv1.ConditionTrue, - Message: "nasty error\n", + Message: "nasty error", Reason: "SyncError", }, { @@ -133,14 +137,13 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "scenario: we have an unavailable workload being updated for too long and no errors thus we are degraded", + name: "scenario: unavailable workload with progress deadline exceeded", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", Namespace: "openshift-apiserver", }, Spec: appsv1.DeploymentSpec{ - Template: corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}}, Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ @@ -152,7 +155,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, pods: []*corev1.Pod{ { - ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver", Labels: map[string]string{"foo": "bar"}}, + ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver"}, Status: corev1.PodStatus{ Phase: corev1.PodPending, ContainerStatuses: []corev1.ContainerStatus{ @@ -184,7 +187,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), Status: operatorv1.ConditionFalse, Reason: "NoPod", - Message: "no apiserver.openshift-apiserver pods available on any node.", + Message: "no apiserver.openshift-apiserver pods available on any node", }, { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), @@ -193,28 +196,27 @@ func TestUpdateOperatorStatus(t *testing.T) { { Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), Status: operatorv1.ConditionTrue, - Reason: "UnavailablePod", - Message: "3 of 3 requested instances are unavailable for apiserver.openshift-apiserver (container is waiting in pending apiserver pod)", + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: timed out", }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest generation and 0/3 pods are available", + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: timed out", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "scenario: we have an unavailable workload being updated for a short time and no errors so we are progressing", + name: "scenario: unavailable workload progressing normally", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", Namespace: "openshift-apiserver", }, Spec: appsv1.DeploymentSpec{ - Template: corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}}, Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ @@ -226,7 +228,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, pods: []*corev1.Pod{ { - ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver", Labels: map[string]string{"foo": "bar"}}, + ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver"}, Status: corev1.PodStatus{ Phase: corev1.PodPending, ContainerStatuses: []corev1.ContainerStatus{ @@ -235,8 +237,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Ready: false, State: corev1.ContainerState{ Waiting: &corev1.ContainerStateWaiting{ - Reason: "ImagePull", - Message: "slow registry", + Reason: "ContainerCreating", }, }, }, @@ -258,7 +259,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), Status: operatorv1.ConditionFalse, Reason: "NoPod", - Message: "no apiserver.openshift-apiserver pods available on any node.", + Message: "no apiserver.openshift-apiserver pods available on any node", }, { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), @@ -273,26 +274,27 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), Status: operatorv1.ConditionTrue, Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest generation and 0/3 pods are available", + Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest revision and 0/3 pods are available", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "scenario: we have an incomplete workload and no errors thus we are available and degraded (missing 1 replica)", + name: "scenario: unavailable workload that previously progressed successfully", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "apiserver", - Namespace: "openshift-apiserver", + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, }, Spec: appsv1.DeploymentSpec{ - Template: corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}}, Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 2, - UpdatedReplicas: 3, + AvailableReplicas: 0, + UpdatedReplicas: 3, + ObservedGeneration: 5, Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, }, @@ -300,20 +302,14 @@ func TestUpdateOperatorStatus(t *testing.T) { }, pods: []*corev1.Pod{ { - ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver"}, + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-1", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-20 * time.Minute))}, Status: corev1.PodStatus{ - Phase: corev1.PodSucceeded, + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionFalse}, + }, ContainerStatuses: []corev1.ContainerStatus{ - { - Name: "test", - Ready: true, - State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{ - Reason: "PodKilled", - Message: "john wick was here", - }, - }, - }, + {Name: "apiserver", Ready: false, RestartCount: 8}, }, }, }, @@ -322,9 +318,9 @@ func TestUpdateOperatorStatus(t *testing.T) { expectedConditions := []operatorv1.OperatorCondition{ { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), - Status: operatorv1.ConditionTrue, - Reason: "AsExpected", - Message: "", + Status: operatorv1.ConditionFalse, + Reason: "NoPod", + Message: "no apiserver.openshift-apiserver pods available on any node", }, { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), @@ -334,20 +330,19 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), Status: operatorv1.ConditionTrue, Reason: "UnavailablePod", - Message: "1 of 3 requested instances are unavailable for apiserver.openshift-apiserver ()", + Message: "3 of 3 requested instances are unavailable for apiserver.openshift-apiserver (container is crashlooping in apiserver-1 pod)", }, { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - Message: "", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "scenario: we have a complete workload and no errors thus we are available", + name: "scenario: partially available workload with failing pod", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -357,13 +352,39 @@ func TestUpdateOperatorStatus(t *testing.T) { Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 3, + AvailableReplicas: 2, UpdatedReplicas: 3, Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, }, }, }, + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-ready", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-20 * time.Minute))}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionTrue}, + }, + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "test", Ready: true}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-crash", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-20 * time.Minute))}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionFalse}, + }, + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "test", Ready: false, RestartCount: 5}, + }, + }, + }, + }, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { expectedConditions := []operatorv1.OperatorCondition{ { @@ -378,9 +399,9 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - Message: "", + Status: operatorv1.ConditionTrue, + Reason: "UnavailablePod", + Message: "1 of 3 requested instances are unavailable for apiserver.openshift-apiserver (container is crashlooping in apiserver-crash pod)", }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), @@ -393,22 +414,18 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "scenario: we have an outdated (generation) workload and no errors thus we are available and we are progressing", + name: "scenario: we have a complete workload and no errors thus we are available", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "apiserver", - Namespace: "openshift-apiserver", - Generation: 100, + Name: "apiserver", + Namespace: "openshift-apiserver", }, Spec: appsv1.DeploymentSpec{ Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - Replicas: 3, - ReadyReplicas: 3, - AvailableReplicas: 3, - UpdatedReplicas: 3, - ObservedGeneration: 99, + AvailableReplicas: 3, + UpdatedReplicas: 3, Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, }, @@ -434,17 +451,16 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "NewGeneration", - Message: "deployment/apiserver.openshift-apiserver: observed generation is 99, desired generation is 100.", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, - { - name: "scenario: rare case when we have an outdated (generation) workload and one old replica failing is but it will be picked up soon by the new rollout thus we are available and we are progressing", + name: "scenario: workload scaling with generation mismatch", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -452,12 +468,12 @@ func TestUpdateOperatorStatus(t *testing.T) { Generation: 100, }, Spec: appsv1.DeploymentSpec{ - Replicas: ptr.To[int32](3), + Replicas: ptr.To[int32](5), }, Status: appsv1.DeploymentStatus{ Replicas: 3, - ReadyReplicas: 2, - AvailableReplicas: 2, + ReadyReplicas: 3, + AvailableReplicas: 3, UpdatedReplicas: 3, ObservedGeneration: 99, Conditions: []appsv1.DeploymentCondition{ @@ -485,66 +501,116 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "NewGeneration", - Message: "deployment/apiserver.openshift-apiserver: observed generation is 99, desired generation is 100.", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "preconditions not fulfilled", - operatorPreconditionsNotReady: true, + name: "scenario: partially available during scale-up, pods starting", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 100, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 1, + UpdatedReplicas: 1, + ObservedGeneration: 99, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-new-1", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-1 * time.Minute))}, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionFalse}, + }, + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "test", Ready: false, RestartCount: 0, State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "ContainerCreating"}}}, + }, + }, + }, + }, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { expectedConditions := []operatorv1.OperatorCondition{ { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), - Status: operatorv1.ConditionFalse, - Reason: "PreconditionNotFulfilled", - Message: "", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", }, { - Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionTrue, - Reason: "PreconditionNotFulfilled", - Message: "the operator didn't specify what preconditions are missing", + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, }, { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "PreconditionNotFulfilled", - Message: "", + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", }, { - Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), - Status: operatorv1.ConditionTrue, - Reason: "PreconditionNotFulfilled", - Message: "the operator didn't specify what preconditions are missing", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "the deployment is progressing to rollout pods, but not all replicas have been updated yet", - operatorPreconditionsNotReady: false, + name: "scenario: partially available during scale-up, new pods failing", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", Namespace: "openshift-apiserver", - Generation: 2, + Generation: 100, }, Spec: appsv1.DeploymentSpec{ - Replicas: ptr.To[int32](3), + Replicas: ptr.To[int32](5), }, Status: appsv1.DeploymentStatus{ - ReadyReplicas: 2, - AvailableReplicas: 2, - UpdatedReplicas: 1, - ObservedGeneration: 2, + AvailableReplicas: 3, + UpdatedReplicas: 3, + ObservedGeneration: 99, Conditions: []appsv1.DeploymentCondition{ - {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-fail-1", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-20 * time.Minute))}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionFalse}, + }, + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "test", Ready: false, RestartCount: 3}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-fail-2", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-20 * time.Minute))}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionFalse}, + }, + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "test", Ready: false, RestartCount: 3}, + }, }, }, }, @@ -562,22 +628,22 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - Message: "", + Status: operatorv1.ConditionTrue, + Reason: "UnavailablePod", + Message: "2 of 5 requested instances are unavailable for apiserver.openshift-apiserver (container is crashlooping in apiserver-fail-1 pod, container is crashlooping in apiserver-fail-2 pod)", }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 1/3 pods have been updated to the latest generation and 2/3 pods are available", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "progressing==false for a longer time shouldn't make the otherwise fine workload degraded", + name: "scenario: partially available during active rollout, pods starting", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -587,19 +653,22 @@ func TestUpdateOperatorStatus(t *testing.T) { Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 3, - UpdatedReplicas: 3, + AvailableReplicas: 2, + UpdatedReplicas: 1, Conditions: []appsv1.DeploymentCondition{ - {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "ReplicaSetUpdated", Message: "progressing"}, }, }, }, - previousConditions: []operatorv1.OperatorCondition{ + pods: []*corev1.Pod{ { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-new", Namespace: "openshift-apiserver"}, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "test", Ready: false, RestartCount: 0, State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "ContainerCreating"}}}, + }, + }, }, }, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { @@ -619,42 +688,48 @@ func TestUpdateOperatorStatus(t *testing.T) { Reason: "AsExpected", }, { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: "deployment/apiserver.openshift-apiserver: 1/3 pods have been updated to the latest revision and 2/3 pods are available", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "some pods rolled out and waiting for old terminating pod before we can progress further", + name: "scenario: zero available replicas, no pods exist", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "apiserver", - Namespace: "openshift-apiserver", + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, }, Spec: appsv1.DeploymentSpec{ - Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, - Template: corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}}, Replicas: ptr.To[int32](3), + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "apiserver"}, + }, + }, }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 2, - ReadyReplicas: 2, - UpdatedReplicas: 3, + AvailableReplicas: 0, + UpdatedReplicas: 0, + ObservedGeneration: 5, Conditions: []appsv1.DeploymentCondition{ - {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, }, }, }, + pods: []*corev1.Pod{}, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { expectedConditions := []operatorv1.OperatorCondition{ { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), - Status: operatorv1.ConditionTrue, - Reason: "AsExpected", - Message: "", + Status: operatorv1.ConditionFalse, + Reason: "NoPod", + Message: "no apiserver.openshift-apiserver pods available on any node", }, { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), @@ -662,58 +737,743 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - Message: "", + Status: operatorv1.ConditionTrue, + Reason: "UnavailablePod", + Message: `3 of 3 requested instances are unavailable for apiserver.openshift-apiserver (no pods found with labels "app=apiserver")`, }, { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 3/3 pods have been updated to the latest generation and 2/3 pods are available", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, - } - - for _, scenario := range scenarios { - t.Run(scenario.name, func(t *testing.T) { - // setup - fakeOperatorClient := v1helpers.NewFakeOperatorClient( - &operatorv1.OperatorSpec{ - ManagementState: operatorv1.Managed, + { + name: "scenario: pod list error", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", }, - &operatorv1.OperatorStatus{ - Conditions: scenario.previousConditions, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), }, - nil, - ) - targetNs := "" - if scenario.workload != nil { - targetNs = scenario.workload.Namespace - } - - delegate := &testDelegate{ - preconditionReady: !scenario.operatorPreconditionsNotReady, - preconditionErr: scenario.preconditionError, - - syncWorkload: scenario.workload, - syncIsAtHighestRevision: scenario.operatorConfigAtHighestRevision, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + UpdatedReplicas: 3, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, + }, + }, + }, + podListErr: fmt.Errorf("fake list error"), + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "SyncError", + Message: "fake list error", + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "scenario: terminating pod past deadline is not reported as failing", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + UpdatedReplicas: 3, + ObservedGeneration: 5, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver-old", + Namespace: "openshift-apiserver", + CreationTimestamp: metav1.NewTime(time.Now().Add(-20 * time.Minute)), + DeletionTimestamp: ptr.To(metav1.NewTime(time.Now().Add(-1 * time.Minute))), + DeletionGracePeriodSeconds: ptr.To[int64](30), + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionFalse}, + }, + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "test", Ready: false, RestartCount: 10}, + }, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + // MinReadySeconds=60, ProgressDeadlineSeconds defaults to 600. + // Combined deadline: 600+60 = 660s (11m). + // Pod created 15m ago → past combined deadline → check is relevant. + // LastTransitionTime 10s ago < MinReadySeconds (60s) → flapping → degraded. + name: "scenario: pod with flapping Ready condition after successful rollout detected as failing", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + MinReadySeconds: 60, + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + UpdatedReplicas: 3, + ObservedGeneration: 5, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-flap", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-15 * time.Minute))}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionTrue, LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Second))}, + }, + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "apiserver", Ready: true, RestartCount: 12}, + }, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "UnavailablePod", + Message: "1 of 3 requested instances are unavailable for apiserver.openshift-apiserver", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + // MinReadySeconds=300, ProgressDeadlineSeconds defaults to 600. + // Combined deadline: 600+300 = 900s (15m). + // Pod created 8m ago → still within combined deadline → check not relevant → not degraded. + name: "scenario: pod with flapping Ready condition after successful rollout within combined deadline not flagged", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + MinReadySeconds: 300, + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + UpdatedReplicas: 3, + ObservedGeneration: 5, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-1", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-8 * time.Minute))}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionTrue, LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Second))}, + }, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + // MinReadySeconds=60, ProgressDeadlineSeconds defaults to 600. + // Combined deadline: 600+60 = 660s (11m). + // Pod created 20m ago → past combined deadline → check is relevant. + // LastTransitionTime 5m ago > MinReadySeconds (60s) → stable → not degraded. + name: "scenario: stably ready pod after successful rollout past combined deadline not flagged as flapping", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + MinReadySeconds: 60, + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + UpdatedReplicas: 3, + ObservedGeneration: 5, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-1", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-20 * time.Minute))}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionTrue, LastTransitionTime: metav1.NewTime(time.Now().Add(-5 * time.Minute))}, + }, + }, + }, + // Young pod within combined deadline. This makes AvailableReplicas count realistic, + // because the steady pod is available. But we need AvailableReplicas < desired, + // otherwise the desired code path is not hit. + { + ObjectMeta: metav1.ObjectMeta{Name: "apiserver-2", Namespace: "openshift-apiserver", CreationTimestamp: metav1.NewTime(time.Now().Add(-10 * time.Second))}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + {Type: corev1.PodReady, Status: corev1.ConditionTrue, LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Second))}, + }, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "preconditions not fulfilled", + operatorPreconditionsNotReady: true, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionFalse, + Reason: "PreconditionNotFulfilled", + Message: "", + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "PreconditionNotFulfilled", + Message: "the operator didn't specify what preconditions are missing", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "PreconditionNotFulfilled", + Message: "", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "PreconditionNotFulfilled", + Message: "the operator didn't specify what preconditions are missing", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "the deployment is progressing to rollout pods, but not all replicas have been updated yet", + operatorPreconditionsNotReady: false, + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 2, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + ReadyReplicas: 2, + AvailableReplicas: 2, + UpdatedReplicas: 1, + ObservedGeneration: 2, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: "deployment/apiserver.openshift-apiserver: 1/3 pods have been updated to the latest revision and 2/3 pods are available", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "scenario: all pods updated but not all available yet", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + ReadyReplicas: 2, + UpdatedReplicas: 3, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: "deployment/apiserver.openshift-apiserver: 3/3 pods have been updated to the latest revision and 2/3 pods are available", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "scenario: available workload with progress deadline exceeded", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 2, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + UpdatedReplicas: 1, + ObservedGeneration: 2, + Conditions: []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentProgressing, + Status: corev1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment has timed out", + LastUpdateTime: metav1.Now(), + LastTransitionTime: metav1.Now(), + }, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: deployment has timed out", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: deployment has timed out", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "scenario: workload rollout with maxSurge (4 of 3 replicas available)", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + Replicas: 4, + AvailableReplicas: 4, + UpdatedReplicas: 2, + ObservedGeneration: 5, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: "deployment/apiserver.openshift-apiserver: 2/3 pods have been updated to the latest revision and 4/3 pods are available", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "scenario: workload recovering from progress deadline exceeded", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 3, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 3, + UpdatedReplicas: 3, + ObservedGeneration: 3, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, + }, + }, + }, + previousConditions: []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment has timed out", + LastTransitionTime: metav1.NewTime(time.Now().Add(-5 * time.Minute)), + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "ProgressDeadlineExceeded", + LastTransitionTime: metav1.NewTime(time.Now().Add(-5 * time.Minute)), + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "version recorded when at highest revision", + operatorConfigAtHighestRevision: true, + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver", Generation: 1}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](1)}, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 1, UpdatedReplicas: 1, ObservedGeneration: 1, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + validateOperatorStatus: func(*operatorv1.OperatorStatus) error { return nil }, + validateVersionRecorder: expectVersionRecorded, + }, + { + name: "version not recorded when not at highest revision", + operatorConfigAtHighestRevision: false, + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver", Generation: 1}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](1)}, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 1, UpdatedReplicas: 1, ObservedGeneration: 1, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + validateOperatorStatus: func(*operatorv1.OperatorStatus) error { return nil }, + validateVersionRecorder: expectVersionNotRecorded, + }, + { + name: "version not recorded when generation != observed generation", + operatorConfigAtHighestRevision: true, + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver", Generation: 2}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](1)}, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 1, UpdatedReplicas: 1, ObservedGeneration: 1, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + validateOperatorStatus: func(*operatorv1.OperatorStatus) error { return nil }, + validateVersionRecorder: expectVersionNotRecorded, + }, + { + name: "version not recorded when available replicas < desired", + operatorConfigAtHighestRevision: true, + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver", Generation: 1}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](3)}, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, UpdatedReplicas: 3, ObservedGeneration: 1, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + validateOperatorStatus: func(*operatorv1.OperatorStatus) error { return nil }, + validateVersionRecorder: expectVersionNotRecorded, + }, + { + name: "version not recorded when updated replicas < desired", + operatorConfigAtHighestRevision: true, + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "apiserver", Namespace: "openshift-apiserver", Generation: 1}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](3)}, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 3, UpdatedReplicas: 2, ObservedGeneration: 1, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + validateOperatorStatus: func(*operatorv1.OperatorStatus) error { return nil }, + validateVersionRecorder: expectVersionNotRecorded, + }, + } + + for _, scenario := range scenarios { + t.Run(scenario.name, func(t *testing.T) { + // setup + fakeOperatorClient := v1helpers.NewFakeOperatorClient( + &operatorv1.OperatorSpec{ + ManagementState: operatorv1.Managed, + }, + &operatorv1.OperatorStatus{ + Conditions: scenario.previousConditions, + }, + nil, + ) + targetNs := "" + if scenario.workload != nil { + targetNs = scenario.workload.Namespace + } + + delegate := &testDelegate{ + preconditionReady: !scenario.operatorPreconditionsNotReady, + preconditionErr: scenario.preconditionError, + + syncWorkload: scenario.workload, + syncIsAtHighestRevision: scenario.operatorConfigAtHighestRevision, syncErrrors: scenario.errors, } + recorder := &fakeVersionRecorder{} + // act target := &Controller{ - operatorClient: fakeOperatorClient, - targetNamespace: targetNs, - podsLister: &fakePodLister{pods: scenario.pods}, - delegate: delegate, + operatorClient: fakeOperatorClient, + targetNamespace: targetNs, + targetOperandVersion: "v1.0.0-test", + podsLister: &fakePodLister{pods: scenario.pods, err: scenario.podListErr}, + delegate: delegate, + versionRecorder: recorder, } err := target.sync(context.TODO(), factory.NewSyncContext("workloadcontroller_test", events.NewInMemoryRecorder("workloadcontroller_test", clocktesting.NewFakePassiveClock(time.Now())))) - if err != nil && len(scenario.errors) == 0 { + if err != nil && len(scenario.errors) == 0 && scenario.podListErr == nil { t.Fatal(err) } @@ -726,12 +1486,18 @@ func TestUpdateOperatorStatus(t *testing.T) { if err != nil { t.Fatal(err) } + if scenario.validateVersionRecorder != nil { + if err := scenario.validateVersionRecorder(recorder); err != nil { + t.Fatal(err) + } + } }) } } type fakePodLister struct { pods []*corev1.Pod + err error } type fakePodNamespaceLister struct { @@ -739,7 +1505,7 @@ type fakePodNamespaceLister struct { } func (f *fakePodNamespaceLister) List(selector labels.Selector) (ret []*corev1.Pod, err error) { - return f.lister.pods, nil + return f.lister.pods, f.lister.err } func (f *fakePodNamespaceLister) Get(name string) (*corev1.Pod, error) { @@ -747,7 +1513,7 @@ func (f *fakePodNamespaceLister) Get(name string) (*corev1.Pod, error) { } func (f *fakePodLister) List(selector labels.Selector) (ret []*corev1.Pod, err error) { - return f.pods, nil + return f.pods, f.err } func (f *fakePodLister) Pods(namespace string) corev1listers.PodNamespaceLister { @@ -756,6 +1522,37 @@ func (f *fakePodLister) Pods(namespace string) corev1listers.PodNamespaceLister } } +type setVersionCall struct { + OperandName, Version string +} + +type fakeVersionRecorder struct { + setVersionCalls []setVersionCall +} + +func (f *fakeVersionRecorder) SetVersion(operandName, version string) { + f.setVersionCalls = append(f.setVersionCalls, setVersionCall{operandName, version}) +} + +func (f *fakeVersionRecorder) UnsetVersion(_ string) {} +func (f *fakeVersionRecorder) GetVersions() map[string]string { return nil } +func (f *fakeVersionRecorder) VersionChangedChannel() <-chan struct{} { return nil } + +func expectVersionRecorded(r *fakeVersionRecorder) error { + expected := []setVersionCall{{OperandName: "apiserver", Version: "v1.0.0-test"}} + if d := cmp.Diff(expected, r.setVersionCalls); d != "" { + return fmt.Errorf("unexpected SetVersion calls (-want +got):\n%s", d) + } + return nil +} + +func expectVersionNotRecorded(r *fakeVersionRecorder) error { + if d := cmp.Diff([]setVersionCall(nil), r.setVersionCalls); d != "" { + return fmt.Errorf("unexpected SetVersion calls (-want +got):\n%s", d) + } + return nil +} + func areCondidtionsEqual(expectedConditions []operatorv1.OperatorCondition, actualConditions []operatorv1.OperatorCondition) error { if len(expectedConditions) != len(actualConditions) { return fmt.Errorf("expected %d conditions but got %d", len(expectedConditions), len(actualConditions))