From c44884edb84cbf0bb4600ff0512a5a0dd01f2a51 Mon Sep 17 00:00:00 2001 From: Thomas Jungblut Date: Mon, 4 May 2026 09:17:03 +0200 Subject: [PATCH] OCPBUGS-62629: reflect workload controller changes in the deployment controller Contains the degraded/progressing condition changes made to the workload controller from #2128. Signed-off-by: Thomas Jungblut --- pkg/apps/deployment/status.go | 19 +- ...si_driver_controller_service_controller.go | 2 +- .../deployment_controller.go | 228 ++++++++++++++---- .../deployment_controller_test.go | 98 +++++++- .../deploymentcontroller/helpers_test.go | 12 + 5 files changed, 300 insertions(+), 59 deletions(-) diff --git a/pkg/apps/deployment/status.go b/pkg/apps/deployment/status.go index 164ef87ea6..5b2f7c450e 100644 --- a/pkg/apps/deployment/status.go +++ b/pkg/apps/deployment/status.go @@ -16,6 +16,20 @@ func PodContainersStatus(deployment *appsv1.Deployment, podClient corelistersv1. if err != nil { return nil, err } + return ContainerMessagesForPods(deployment, deploymentPods), nil +} + +// ContainerMessagesForPods returns human-readable container status messages for the given pods. +// If pods is empty, a single message is included describing that no pods matched (using the deployment template labels). +func ContainerMessagesForPods(deployment *appsv1.Deployment, pods []*corev1.Pod) []string { + containerStates := containerStatusMessagesForPods(pods) + if len(pods) == 0 { + containerStates = append(containerStates, fmt.Sprintf("no pods found with labels %q", labels.SelectorFromSet(deployment.Spec.Template.Labels).String())) + } + return containerStates +} + +func containerStatusMessagesForPods(deploymentPods []*corev1.Pod) []string { containerStates := []string{} for i := range deploymentPods { @@ -66,10 +80,7 @@ func PodContainersStatus(deployment *appsv1.Deployment, podClient corelistersv1. } } - if len(deploymentPods) == 0 { - containerStates = append(containerStates, fmt.Sprintf("no pods found with labels %q", labels.SelectorFromSet(deployment.Spec.Template.Labels).String())) - } - return containerStates, nil + return containerStates } func containerPlural(c int, crashloop bool) string { diff --git a/pkg/operator/csi/csidrivercontrollerservicecontroller/csi_driver_controller_service_controller.go b/pkg/operator/csi/csidrivercontrollerservicecontroller/csi_driver_controller_service_controller.go index c89caa5800..0830e67084 100644 --- a/pkg/operator/csi/csidrivercontrollerservicecontroller/csi_driver_controller_service_controller.go +++ b/pkg/operator/csi/csidrivercontrollerservicecontroller/csi_driver_controller_service_controller.go @@ -61,7 +61,7 @@ import ( // // Available: indicates that the CSI Controller Service was successfully deployed and at least one Deployment replica is available. // Progressing: indicates that the CSI Controller Service is being deployed. -// Degraded: produced when the sync() method returns an error. +// Degraded: true when the deployment has timed out progressing, when failing pods reduce availability (while not mid-rollout), or when sync returns an error. func NewCSIDriverControllerServiceController( name string, diff --git a/pkg/operator/deploymentcontroller/deployment_controller.go b/pkg/operator/deploymentcontroller/deployment_controller.go index 022b0278b3..75392413a0 100644 --- a/pkg/operator/deploymentcontroller/deployment_controller.go +++ b/pkg/operator/deploymentcontroller/deployment_controller.go @@ -9,6 +9,7 @@ import ( opv1 "github.com/openshift/api/operator/v1" applyoperatorv1 "github.com/openshift/client-go/operator/applyconfigurations/operator/v1" + dpm "github.com/openshift/library-go/pkg/apps/deployment" "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/events" "github.com/openshift/library-go/pkg/operator/management" @@ -42,7 +43,7 @@ type ManifestHookFunc func(*opv1.OperatorSpec, []byte) ([]byte, error) // This controller optionally produces the following conditions: // Available: indicates that the deployment controller was successfully deployed and at least one Deployment replica is available. // Progressing: indicates that the Deployment is in progress. -// Degraded: produced when the sync() method returns an error. +// Degraded: true when the deployment has timed out progressing, when failing pods reduce availability (while not mid-rollout), or when sync returns an error. type DeploymentController struct { // instanceName is the name to identify what instance this belongs too: FooDriver for instance instanceName string @@ -193,7 +194,9 @@ func (c *DeploymentController) ToController() (factory.Controller, error) { ).ResyncEvery( time.Minute, ) - if slices.Contains(c.conditions, opv1.OperatorStatusTypeDegraded) { + // When this controller owns Degraded in status, do not use WithSyncDegradedOnError: reconcile would set + // Degraded=False on every successful sync and clear deployment operand degradation (see openshift/library-go#2128). + if !slices.Contains(c.conditions, opv1.OperatorStatusTypeDegraded) { controller = controller.WithSyncDegradedOnError(c.operatorClient) } return controller.ToController( @@ -207,7 +210,18 @@ func (c *DeploymentController) Name() string { return c.instanceName } -func (c *DeploymentController) sync(ctx context.Context, syncContext factory.SyncContext) error { +func (c *DeploymentController) sync(ctx context.Context, syncContext factory.SyncContext) (err error) { + if slices.Contains(c.conditions, opv1.OperatorStatusTypeDegraded) { + defer func() { + if err != nil { + applyErr := c.applySyncErrorDegraded(ctx, err) + if applyErr != nil { + klog.V(2).Infof("failed to apply sync error degraded status: %v", applyErr) + } + } + }() + } + opSpec, opStatus, _, err := c.operatorClient.GetOperatorState() if apierrors.IsNotFound(err) && management.IsOperatorRemovable() { return nil @@ -217,7 +231,7 @@ func (c *DeploymentController) sync(ctx context.Context, syncContext factory.Syn } if opSpec.ManagementState == opv1.Removed && management.IsOperatorRemovable() { - return c.syncDeleting(ctx, opSpec, opStatus, syncContext) + return c.syncDeleting(ctx, opSpec, syncContext) } if opSpec.ManagementState != opv1.Managed { @@ -229,11 +243,21 @@ func (c *DeploymentController) sync(ctx context.Context, syncContext factory.Syn return err } if management.IsOperatorRemovable() && meta.DeletionTimestamp != nil { - return c.syncDeleting(ctx, opSpec, opStatus, syncContext) + return c.syncDeleting(ctx, opSpec, syncContext) } return c.syncManaged(ctx, opSpec, opStatus, syncContext) } +func (c *DeploymentController) applySyncErrorDegraded(ctx context.Context, syncErr error) error { + degraded := applyoperatorv1.OperatorCondition(). + WithType(c.instanceName + opv1.OperatorStatusTypeDegraded). + WithStatus(opv1.ConditionTrue). + WithReason("SyncError"). + WithMessage(syncErr.Error()) + status := applyoperatorv1.OperatorStatus().WithConditions(degraded) + return c.operatorClient.ApplyOperatorStatus(ctx, c.controllerInstanceName, status) +} + func (c *DeploymentController) syncManaged(ctx context.Context, opSpec *opv1.OperatorSpec, opStatus *opv1.OperatorStatus, syncContext factory.SyncContext) error { klog.V(4).Infof("syncManaged") @@ -257,7 +281,7 @@ func (c *DeploymentController) syncManaged(ctx context.Context, opSpec *opv1.Ope if err != nil { return err } - // Create an OperatorStatusApplyConfiguration with generations + status := applyoperatorv1.OperatorStatus(). WithGenerations(&applyoperatorv1.GenerationStatusApplyConfiguration{ Group: ptr.To("apps"), @@ -267,7 +291,8 @@ func (c *DeploymentController) syncManaged(ctx context.Context, opSpec *opv1.Ope LastGeneration: ptr.To(deployment.Generation), }) - // Set Available condition + now := time.Now() + if slices.Contains(c.conditions, opv1.OperatorStatusTypeAvailable) { availableCondition := applyoperatorv1. OperatorCondition().WithType(c.instanceName + opv1.OperatorStatusTypeAvailable) @@ -276,17 +301,25 @@ func (c *DeploymentController) syncManaged(ctx context.Context, opSpec *opv1.Ope WithStatus(opv1.ConditionTrue). WithMessage("Deployment is available"). WithReason("AsExpected") - } else { availableCondition = availableCondition. WithStatus(opv1.ConditionFalse). - WithMessage("Waiting for Deployment"). - WithReason("Deploying") + WithReason("NoPod"). + WithMessage(fmt.Sprintf("no %s.%s pods available on any node", deployment.Name, deployment.Namespace)) } status = status.WithConditions(availableCondition) } - // Set Progressing condition + desiredReplicas := ptr.Deref(deployment.Spec.Replicas, 1) + + progressTimedOutMessage, workloadIsBeingUpdatedTooLong := hasDeploymentTimedOutProgressing(deployment.Status) + workloadIsBeingUpdated := !hasDeploymentProgressed(deployment.Status) && !workloadIsBeingUpdatedTooLong + + var progressDeadlineExceededMessage string + if workloadIsBeingUpdatedTooLong { + progressDeadlineExceededMessage = fmt.Sprintf("deployment/%s.%s has timed out progressing: %s", deployment.Name, deployment.Namespace, progressTimedOutMessage) + } + if slices.Contains(c.conditions, opv1.OperatorStatusTypeProgressing) { progressingCondition := applyoperatorv1.OperatorCondition(). WithType(c.instanceName + opv1.OperatorStatusTypeProgressing). @@ -294,20 +327,66 @@ func (c *DeploymentController) syncManaged(ctx context.Context, opSpec *opv1.Ope WithMessage("Deployment is not progressing"). WithReason("AsExpected") - if ok, msg := isProgressing(deployment); ok { + switch { + case workloadIsBeingUpdated: progressingCondition = progressingCondition. WithStatus(opv1.ConditionTrue). - WithMessage(msg). - WithReason("Deploying") + WithReason("PodsUpdating"). + WithMessage(fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest revision and %d/%d pods are available", deployment.Name, deployment.Namespace, deployment.Status.UpdatedReplicas, desiredReplicas, deployment.Status.AvailableReplicas, desiredReplicas)) + case workloadIsBeingUpdatedTooLong: + progressingCondition = progressingCondition. + WithStatus(opv1.ConditionFalse). + WithReason("ProgressDeadlineExceeded"). + WithMessage(progressDeadlineExceededMessage) + default: + progressingCondition = progressingCondition. + WithStatus(opv1.ConditionFalse). + WithReason("AsExpected") + } + status = status.WithConditions(progressingCondition) + } + + if slices.Contains(c.conditions, opv1.OperatorStatusTypeDegraded) { + degradedCondition := applyoperatorv1.OperatorCondition(). + WithType(c.instanceName + opv1.OperatorStatusTypeDegraded). + WithStatus(opv1.ConditionFalse). + WithReason("AsExpected") - // Degrade when operator is progressing too long. - // Only do this if we would continue to be in the Progressing state, otherwise, we'll never get out - if v1helpers.IsUpdatingTooLong(opStatus, c.instanceName+opv1.OperatorStatusTypeProgressing) { - return fmt.Errorf("Deployment was progressing too long") + switch { + case workloadIsBeingUpdatedTooLong: + degradedCondition = degradedCondition. + WithStatus(opv1.ConditionTrue). + WithReason("ProgressDeadlineExceeded"). + WithMessage(progressDeadlineExceededMessage) + + case !workloadIsBeingUpdated && deployment.Status.AvailableReplicas < desiredReplicas: + operandPods := c.listOperandPodsForDiagnostics(ctx, deployment, syncContext) + livePods := nonDeletingPods(operandPods) + hasFailing := hasFailingPods(deployment, livePods, now) + if hasFailing || deployment.Status.AvailableReplicas == 0 { + containerMessages := dpm.ContainerMessagesForPods(deployment, livePods) + var failureDescription string + if len(containerMessages) > 0 { + failureDescription = ` (` + strings.Join(containerMessages, ", ") + `)` + } + numUnavailable := desiredReplicas - deployment.Status.AvailableReplicas + message := fmt.Sprintf("%d of %d requested instances are unavailable for %s.%s%s", numUnavailable, desiredReplicas, deployment.Name, deployment.Namespace, failureDescription) + degradedCondition = degradedCondition. + WithStatus(opv1.ConditionTrue). + WithReason("UnavailablePod"). + WithMessage(message) + } else { + degradedCondition = degradedCondition. + WithStatus(opv1.ConditionFalse). + WithReason("AsExpected") } - } - status = status.WithConditions(progressingCondition) + default: + degradedCondition = degradedCondition. + WithStatus(opv1.ConditionFalse). + WithReason("AsExpected") + } + status = status.WithConditions(degradedCondition) } return c.operatorClient.ApplyOperatorStatus( @@ -317,7 +396,34 @@ func (c *DeploymentController) syncManaged(ctx context.Context, opSpec *opv1.Ope ) } -func (c *DeploymentController) syncDeleting(ctx context.Context, opSpec *opv1.OperatorSpec, opStatus *opv1.OperatorStatus, syncContext factory.SyncContext) error { +// listOperandPodsForDiagnostics lists pods matched by the deployment selector for UnavailablePod diagnostics. +// A nil selector, selector conversion errors, or API list errors are logged and recorded as warnings; no error is returned. +func (c *DeploymentController) listOperandPodsForDiagnostics(ctx context.Context, deploymentObj *appsv1.Deployment, syncContext factory.SyncContext) []*corev1.Pod { + if deploymentObj.Spec.Selector == nil { + klog.Warningf("deployment/%s/%s has no spec.selector, skipping pod diagnostics", deploymentObj.Namespace, deploymentObj.Name) + syncContext.Recorder().Warningf("DeploymentSelectorMissing", "deployment %s/%s has no spec.selector, skipping pod diagnostics", deploymentObj.Namespace, deploymentObj.Name) + return nil + } + selector, err := metav1.LabelSelectorAsSelector(deploymentObj.Spec.Selector) + if err != nil { + klog.Warningf("deployment/%s/%s has invalid spec.selector: %v", deploymentObj.Namespace, deploymentObj.Name, err) + syncContext.Recorder().Warningf("DeploymentSelectorInvalid", "deployment %s/%s has invalid spec.selector: %v", deploymentObj.Namespace, deploymentObj.Name, err) + return nil + } + podList, err := c.kubeClient.CoreV1().Pods(deploymentObj.Namespace).List(ctx, metav1.ListOptions{LabelSelector: selector.String()}) + if err != nil { + klog.Warningf("deployment/%s/%s: pod list for diagnostics failed: %v", deploymentObj.Namespace, deploymentObj.Name, err) + syncContext.Recorder().Warningf("PodListFailed", "listing pods for deployment %s/%s diagnostics: %v", deploymentObj.Namespace, deploymentObj.Name, err) + return nil + } + out := make([]*corev1.Pod, len(podList.Items)) + for i := range podList.Items { + out[i] = &podList.Items[i] + } + return out +} + +func (c *DeploymentController) syncDeleting(ctx context.Context, opSpec *opv1.OperatorSpec, syncContext factory.SyncContext) error { klog.V(4).Infof("syncDeleting") required, err := c.getDeployment(opSpec) if err != nil { @@ -356,36 +462,70 @@ func (c *DeploymentController) getDeployment(opSpec *opv1.OperatorSpec) (*appsv1 return required, nil } -func isProgressing(deployment *appsv1.Deployment) (bool, string) { +// hasDeploymentProgressed returns true if the deployment reports NewReplicaSetAvailable +// via the DeploymentProgressing condition. +func hasDeploymentProgressed(status appsv1.DeploymentStatus) bool { + for _, cond := range status.Conditions { + if cond.Type == appsv1.DeploymentProgressing { + return cond.Status == corev1.ConditionTrue && cond.Reason == "NewReplicaSetAvailable" + } + } + return false +} - var deploymentExpectedReplicas int32 - if deployment.Spec.Replicas != nil { - deploymentExpectedReplicas = *deployment.Spec.Replicas +// hasDeploymentTimedOutProgressing returns true if the deployment reports ProgressDeadlineExceeded. +// The function returns the Progressing condition message as the first return value. +func hasDeploymentTimedOutProgressing(status appsv1.DeploymentStatus) (string, bool) { + for _, cond := range status.Conditions { + if cond.Type == appsv1.DeploymentProgressing { + return cond.Message, cond.Status == corev1.ConditionFalse && cond.Reason == "ProgressDeadlineExceeded" + } } + return "", false +} - switch { - case deployment.Generation != deployment.Status.ObservedGeneration: - return true, "Waiting for Deployment to act on changes" - case hasFinishedProgressing(deployment): - return false, "" - case deployment.Status.UnavailableReplicas > 0: - return true, "Waiting for Deployment to deploy pods" - case deployment.Status.UpdatedReplicas < deploymentExpectedReplicas: - return true, "Waiting for Deployment to update pods" - case deployment.Status.AvailableReplicas < deploymentExpectedReplicas: - return true, "Waiting for Deployment to deploy pods" +// nonDeletingPods returns pods that are not terminating (no deletion timestamp). +func nonDeletingPods(pods []*corev1.Pod) []*corev1.Pod { + out := make([]*corev1.Pod, 0, len(pods)) + for _, p := range pods { + if p != nil && p.DeletionTimestamp == nil { + out = append(out, p) + } } - return false, "" + return out } -func hasFinishedProgressing(deployment *appsv1.Deployment) bool { - // Deployment whose rollout is complete gets Progressing condition with Reason NewReplicaSetAvailable condition. - // https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#complete-deployment - // Any subsequent missing replicas (e.g. caused by a node reboot) must not not change the Progressing condition. - for _, cond := range deployment.Status.Conditions { - if cond.Type == appsv1.DeploymentProgressing { - return cond.Status == corev1.ConditionTrue && cond.Reason == "NewReplicaSetAvailable" +func hasFailingPods(workload *appsv1.Deployment, pods []*corev1.Pod, now time.Time) bool { + progressDeadline := time.Duration(ptr.Deref(workload.Spec.ProgressDeadlineSeconds, 600)) * time.Second + minReady := time.Duration(workload.Spec.MinReadySeconds) * time.Second + + for _, pod := range pods { + if pod.DeletionTimestamp != nil { + continue + } + + readyCond := findPodReadyCondition(pod) + deadline := pod.CreationTimestamp.Time.Add(progressDeadline) + + if (readyCond == nil || readyCond.Status != corev1.ConditionTrue) && now.After(deadline) { + return true + } + + if minReady > 0 && readyCond != nil && readyCond.Status == corev1.ConditionTrue { + isRelevant := now.After(pod.CreationTimestamp.Time.Add(progressDeadline + minReady)) + if isRelevant && now.Sub(readyCond.LastTransitionTime.Time) < minReady { + return true + } } } return false } + +func findPodReadyCondition(pod *corev1.Pod) *corev1.PodCondition { + for i := range pod.Status.Conditions { + if pod.Status.Conditions[i].Type == corev1.PodReady { + return &pod.Status.Conditions[i] + } + } + return nil +} diff --git a/pkg/operator/deploymentcontroller/deployment_controller_test.go b/pkg/operator/deploymentcontroller/deployment_controller_test.go index 01e44c2ed2..72b4efac9a 100644 --- a/pkg/operator/deploymentcontroller/deployment_controller_test.go +++ b/pkg/operator/deploymentcontroller/deployment_controller_test.go @@ -2,8 +2,10 @@ package deploymentcontroller import ( "context" + "fmt" "os" "sort" + "strings" "time" "testing" @@ -46,6 +48,7 @@ const ( var ( conditionAvailable = controllerName + opv1.OperatorStatusTypeAvailable conditionProgressing = controllerName + opv1.OperatorStatusTypeProgressing + conditionDegraded = controllerName + opv1.OperatorStatusTypeDegraded ) type testCase struct { @@ -54,6 +57,9 @@ type testCase struct { initialObjects testObjects expectedObjects testObjects expectErr bool + // assertOperatorStatus inspects operator status before sanitizeInstanceStatus strips Reason/Message. + assertOperatorStatus func(t *testing.T, status *opv1.OperatorStatus) + optionalManifestHooks []ManifestHookFunc } type testObjects struct { @@ -400,7 +406,7 @@ func newTestContext(test testCase, t *testing.T) *testContext { nil, /*triggerErr func*/ ) optionalInformers := []factory.Informer{configInformer} - var optionalManifestHookFuncs []ManifestHookFunc + optionalManifestHookFuncs := test.optionalManifestHooks controller := NewDeploymentController( controllerName, makeFakeManifest(), @@ -470,7 +476,7 @@ func TestSync(t *testing.T) { operator: makeFakeOperatorInstance( withGenerations(1), withTrueConditions(conditionProgressing), - withFalseConditions(conditionAvailable), + withFalseConditions(conditionAvailable, conditionDegraded), withFinalizers(finalizerName), ), }, @@ -526,7 +532,7 @@ func TestSync(t *testing.T) { // withStatus(replica0), withGenerations(1), withTrueConditions(conditionProgressing), - withFalseConditions(conditionAvailable)), // Degraded is set later on + withFalseConditions(conditionAvailable, conditionDegraded)), }, }, { @@ -548,7 +554,7 @@ func TestSync(t *testing.T) { // withStatus(replica1), withGenerations(1), withTrueConditions(conditionAvailable), - withFalseConditions(conditionProgressing)), + withFalseConditions(conditionProgressing, conditionDegraded)), }, }, { @@ -569,8 +575,9 @@ func TestSync(t *testing.T) { operator: makeFakeOperatorInstance( // withStatus(replica1), withGenerations(1), - withFalseConditions(conditionAvailable), // No pod is running - withFalseConditions(conditionProgressing)), // Despite missing pod, the operator is not progressing + withFalseConditions(conditionAvailable), // No pod is running + withFalseConditions(conditionProgressing), // Despite missing pod, the operator is not progressing + withTrueConditions(conditionDegraded)), // Operand unavailable while not mid-rollout }, }, { @@ -590,8 +597,8 @@ func TestSync(t *testing.T) { operator: makeFakeOperatorInstance( // withStatus(replica1), withGenerations(1), - withFalseConditions(conditionAvailable), // No pod is running - withTrueConditions(conditionProgressing)), // A pod is missing, the operator is progressing + withFalseConditions(conditionAvailable, conditionDegraded), // No pod is running + withTrueConditions(conditionProgressing)), // A pod is missing, the operator is progressing }, }, { @@ -613,6 +620,7 @@ func TestSync(t *testing.T) { // withStatus(replica1), withGenerations(3), // now the operator knows generation 1 withTrueConditions(conditionAvailable, conditionProgressing), // Progressing due to Generation change + withFalseConditions(conditionDegraded), ), }, }, @@ -637,7 +645,7 @@ func TestSync(t *testing.T) { // withStatus(replica0), withGenerations(1), withTrueConditions(conditionProgressing), // The operator is Progressing - withFalseConditions(conditionAvailable)), // The operator is not Available (controller not running...) + withFalseConditions(conditionAvailable, conditionDegraded)), }, }, { @@ -660,7 +668,74 @@ func TestSync(t *testing.T) { operator: makeFakeOperatorInstance( // withStatus(replica0), withGenerations(1), - withTrueConditions(conditionAvailable, conditionProgressing)), // The operator is Progressing, but still Available + withTrueConditions(conditionAvailable, conditionProgressing), // The operator is Progressing, but still Available + withFalseConditions(conditionDegraded), + ), + }, + }, + { + name: "operator conditions expose ProgressDeadlineExceeded reason and message", + initialObjects: testObjects{ + deployment: makeDeployment( + withDeploymentGeneration(1, 1), + withDeploymentStatus(replica0, replica0, replica0), + withDeploymentConditionWithMessage(appsv1.DeploymentProgressing, "ProgressDeadlineExceeded", corev1.ConditionFalse, "ReplicaSet timed out")), + operator: makeFakeOperatorInstance(withGenerations(1)), + }, + expectedObjects: testObjects{ + deployment: makeDeployment( + withDeploymentGeneration(1, 1), + withDeploymentStatus(replica0, replica0, replica0), + withDeploymentConditionWithMessage(appsv1.DeploymentProgressing, "ProgressDeadlineExceeded", corev1.ConditionFalse, "ReplicaSet timed out")), + operator: makeFakeOperatorInstance( + withGenerations(1), + withFalseConditions(conditionAvailable, conditionProgressing), + withTrueConditions(conditionDegraded), + ), + }, + assertOperatorStatus: func(t *testing.T, status *opv1.OperatorStatus) { + wantMsg := "deployment/dummy-deployment.openshift-dummy-test-deployment has timed out progressing: ReplicaSet timed out" + deg := v1helpers.FindOperatorCondition(status.Conditions, conditionDegraded) + if deg == nil { + t.Fatalf("missing %s", conditionDegraded) + } + if deg.Status != opv1.ConditionTrue || deg.Reason != "ProgressDeadlineExceeded" || deg.Message != wantMsg { + t.Fatalf("Degraded: got status=%s reason=%q message=%q, want True ProgressDeadlineExceeded %q", deg.Status, deg.Reason, deg.Message, wantMsg) + } + prog := v1helpers.FindOperatorCondition(status.Conditions, conditionProgressing) + if prog == nil { + t.Fatalf("missing %s", conditionProgressing) + } + if prog.Status != opv1.ConditionFalse || prog.Reason != "ProgressDeadlineExceeded" || prog.Message != wantMsg { + t.Fatalf("Progressing: got status=%s reason=%q message=%q, want False ProgressDeadlineExceeded %q", prog.Status, prog.Reason, prog.Message, wantMsg) + } + }, + }, + { + name: "sync error applies SyncError degraded via defer when manifest hook fails", + expectErr: true, + initialObjects: testObjects{ + operator: makeFakeOperatorInstance(), + }, + expectedObjects: testObjects{ + operator: makeFakeOperatorInstance(withTrueConditions(conditionDegraded)), + }, + optionalManifestHooks: []ManifestHookFunc{ + func(_ *opv1.OperatorSpec, _ []byte) ([]byte, error) { + return nil, fmt.Errorf("forced manifest hook error for SyncError degraded regression") + }, + }, + assertOperatorStatus: func(t *testing.T, status *opv1.OperatorStatus) { + deg := v1helpers.FindOperatorCondition(status.Conditions, conditionDegraded) + if deg == nil { + t.Fatalf("missing %s after sync error (applySyncErrorDegraded / defer path)", conditionDegraded) + } + if deg.Status != opv1.ConditionTrue || deg.Reason != "SyncError" { + t.Fatalf("Degraded: got status=%s reason=%q, want True SyncError (deferred applySyncErrorDegraded while WithSyncDegradedOnError is off)", deg.Status, deg.Reason) + } + if !strings.Contains(deg.Message, "error running hook function") || !strings.Contains(deg.Message, "forced manifest hook error for SyncError degraded regression") { + t.Fatalf("Degraded.Message: got %q; expected wrapped hook error from getDeployment()", deg.Message) + } }, }, } @@ -718,6 +793,9 @@ func TestSync(t *testing.T) { if err != nil { t.Errorf("Failed to get operator: %v", err) } + if test.assertOperatorStatus != nil { + test.assertOperatorStatus(t, actualStatus) + } sanitizeInstanceStatus(actualStatus) sanitizeInstanceStatus(&test.expectedObjects.operator.Status) if !equality.Semantic.DeepEqual(test.expectedObjects.operator.Status, *actualStatus) { diff --git a/pkg/operator/deploymentcontroller/helpers_test.go b/pkg/operator/deploymentcontroller/helpers_test.go index 16966266ee..97e74bc09d 100644 --- a/pkg/operator/deploymentcontroller/helpers_test.go +++ b/pkg/operator/deploymentcontroller/helpers_test.go @@ -77,6 +77,18 @@ func withDeploymentConditions(conditionType appsv1.DeploymentConditionType, reas } } +func withDeploymentConditionWithMessage(conditionType appsv1.DeploymentConditionType, reason string, status v1.ConditionStatus, message string) deploymentModifier { + return func(instance *appsv1.Deployment) *appsv1.Deployment { + instance.Status.Conditions = append(instance.Status.Conditions, appsv1.DeploymentCondition{ + Type: conditionType, + Status: status, + Reason: reason, + Message: message, + }) + return instance + } +} + func TestWithReplicasHook(t *testing.T) { var ( masterNodeLabels = map[string]string{"node-role.kubernetes.io/master": ""}