From 6584b18766508e885c52da91ec63391fe2018e94 Mon Sep 17 00:00:00 2001 From: Wantong Date: Mon, 12 Jan 2026 18:10:46 -0800 Subject: [PATCH 1/5] fix: fix syncRoleBinding to avoid redundant roleBinding update (#411) --- .../v1beta1/membercluster_controller.go | 8 ++++ .../v1beta1/membercluster_controller_test.go | 39 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/pkg/controllers/membercluster/v1beta1/membercluster_controller.go b/pkg/controllers/membercluster/v1beta1/membercluster_controller.go index 0a3e99de6..35c61c972 100644 --- a/pkg/controllers/membercluster/v1beta1/membercluster_controller.go +++ b/pkg/controllers/membercluster/v1beta1/membercluster_controller.go @@ -426,6 +426,14 @@ func (r *Reconciler) syncRoleBinding(ctx context.Context, mc *clusterv1beta1.Mem Name: roleName, }, } + // For User and Group kind, the APIGroup is defaulted to rbac.authorization.k8s.io if not set. + // Reference: https://pkg.go.dev/k8s.io/api/rbac/v1#Subject + for i := range expectedRoleBinding.Subjects { + subj := &expectedRoleBinding.Subjects[i] + if subj.APIGroup == "" && (subj.Kind == rbacv1.GroupKind || subj.Kind == rbacv1.UserKind) { + subj.APIGroup = rbacv1.GroupName + } + } // Creates role binding if not found. var currentRoleBinding rbacv1.RoleBinding diff --git a/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go b/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go index 31c1dd249..274b9ccf0 100644 --- a/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go +++ b/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go @@ -355,6 +355,11 @@ func TestSyncRole(t *testing.T) { func TestSyncRoleBinding(t *testing.T) { identity := rbacv1.Subject{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "User", + Name: "MemberClusterIdentity", + } + identityWithoutGroup := rbacv1.Subject{ Kind: "User", Name: "MemberClusterIdentity", } @@ -428,6 +433,40 @@ func TestSyncRoleBinding(t *testing.T) { roleName: "fleet-role-mc1", wantedError: "", }, + "identity without APIGroup should not trigger roleBinding reconcile": { + r: &Reconciler{ + Client: &test.MockClient{ + MockGet: func(ctx context.Context, key client.ObjectKey, obj client.Object) error { + roleRef := rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: "fleet-role-mc1", + } + o := obj.(*rbacv1.RoleBinding) + *o = rbacv1.RoleBinding{ + TypeMeta: metav1.TypeMeta{ + Kind: "RoleBinding", + APIVersion: rbacv1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "fleet-rolebinding-mc1", + Namespace: namespace1, + }, + Subjects: []rbacv1.Subject{identity}, // Returned roleBinding has APIGroup set. + RoleRef: roleRef, + } + return nil + }, + }, + }, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "mc1"}, + Spec: clusterv1beta1.MemberClusterSpec{Identity: identityWithoutGroup}, + }, + namespaceName: namespace1, + roleName: "fleet-role-mc1", + wantedError: "", + }, "role binding but with diff": { r: &Reconciler{ Client: &test.MockClient{ From 63fca1229d9f2b932db7a28759711c7c166dcffc Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Wed, 14 Jan 2026 14:26:02 +1100 Subject: [PATCH 2/5] fix: correctly set up priority queue in work applier (#362) --- cmd/memberagent/main.go | 55 +- .../v1beta1/member_suite_test.go | 4 +- pkg/controllers/workapplier/controller.go | 253 +++--- pkg/controllers/workapplier/pq.go | 294 +++++++ pkg/controllers/workapplier/pq_test.go | 721 ++++++++++++++++++ pkg/controllers/workapplier/suite_test.go | 63 +- 6 files changed, 1165 insertions(+), 225 deletions(-) create mode 100644 pkg/controllers/workapplier/pq.go create mode 100644 pkg/controllers/workapplier/pq_test.go diff --git a/cmd/memberagent/main.go b/cmd/memberagent/main.go index 5ef220d99..226df4a1c 100644 --- a/cmd/memberagent/main.go +++ b/cmd/memberagent/main.go @@ -77,21 +77,19 @@ var ( "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.") leaderElectionNamespace = flag.String("leader-election-namespace", "kube-system", "The namespace in which the leader election resource will be created.") // TODO(weiweng): only keep enableV1Alpha1APIs for backward compatibility with helm charts. Remove soon. - enableV1Alpha1APIs = flag.Bool("enable-v1alpha1-apis", false, "If set, the agents will watch for the v1alpha1 APIs. This is deprecated and will be removed soon.") - enableV1Beta1APIs = flag.Bool("enable-v1beta1-apis", true, "If set, the agents will watch for the v1beta1 APIs.") - propertyProvider = flag.String("property-provider", "none", "The property provider to use for the agent.") - region = flag.String("region", "", "The region where the member cluster resides.") - cloudConfigFile = flag.String("cloud-config", "/etc/kubernetes/provider/config.json", "The path to the cloud cloudconfig file.") - watchWorkWithPriorityQueue = flag.Bool("enable-watch-work-with-priority-queue", false, "If set, the apply_work controller will watch/reconcile work objects that are created new or have recent updates") - watchWorkReconcileAgeMinutes = flag.Int("watch-work-reconcile-age", 60, "maximum age (in minutes) of work objects for apply_work controller to watch/reconcile") - deletionWaitTime = flag.Int("deletion-wait-time", 5, "The time the work-applier will wait for work object to be deleted before updating the applied work owner reference") - enablePprof = flag.Bool("enable-pprof", false, "enable pprof profiling") - pprofPort = flag.Int("pprof-port", 6065, "port for pprof profiling") - hubPprofPort = flag.Int("hub-pprof-port", 6066, "port for hub pprof profiling") - hubQPS = flag.Float64("hub-api-qps", 50, "QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") - hubBurst = flag.Int("hub-api-burst", 500, "Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") - memberQPS = flag.Float64("member-api-qps", 250, "QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") - memberBurst = flag.Int("member-api-burst", 1000, "Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") + enableV1Alpha1APIs = flag.Bool("enable-v1alpha1-apis", false, "If set, the agents will watch for the v1alpha1 APIs. This is deprecated and will be removed soon.") + enableV1Beta1APIs = flag.Bool("enable-v1beta1-apis", true, "If set, the agents will watch for the v1beta1 APIs.") + propertyProvider = flag.String("property-provider", "none", "The property provider to use for the agent.") + region = flag.String("region", "", "The region where the member cluster resides.") + cloudConfigFile = flag.String("cloud-config", "/etc/kubernetes/provider/config.json", "The path to the cloud cloudconfig file.") + deletionWaitTime = flag.Int("deletion-wait-time", 5, "The time the work-applier will wait for work object to be deleted before updating the applied work owner reference") + enablePprof = flag.Bool("enable-pprof", false, "enable pprof profiling") + pprofPort = flag.Int("pprof-port", 6065, "port for pprof profiling") + hubPprofPort = flag.Int("hub-pprof-port", 6066, "port for hub pprof profiling") + hubQPS = flag.Float64("hub-api-qps", 50, "QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") + hubBurst = flag.Int("hub-api-burst", 500, "Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") + memberQPS = flag.Float64("member-api-qps", 250, "QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") + memberBurst = flag.Int("member-api-burst", 1000, "Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") // Work applier requeue rate limiter settings. workApplierRequeueRateLimiterAttemptsWithFixedDelay = flag.Int("work-applier-requeue-rate-limiter-attempts-with-fixed-delay", 1, "If set, the work applier will requeue work objects with a fixed delay for the specified number of attempts before switching to exponential backoff.") @@ -102,6 +100,12 @@ var ( workApplierRequeueRateLimiterExponentialBaseForFastBackoff = flag.Float64("work-applier-requeue-rate-limiter-exponential-base-for-fast-backoff", 1.5, "If set, the work applier will start to back off fast at this factor after it completes the slow backoff stage, until it reaches the fast backoff delay cap. Its value should be larger than the base value for the slow backoff stage.") workApplierRequeueRateLimiterMaxFastBackoffDelaySeconds = flag.Float64("work-applier-requeue-rate-limiter-max-fast-backoff-delay-seconds", 900, "If set, the work applier will not back off longer than this value in seconds when it is in the fast backoff stage.") workApplierRequeueRateLimiterSkipToFastBackoffForAvailableOrDiffReportedWorkObjs = flag.Bool("work-applier-requeue-rate-limiter-skip-to-fast-backoff-for-available-or-diff-reported-work-objs", true, "If set, the rate limiter will skip the slow backoff stage and start fast backoff immediately for work objects that are available or have diff reported.") + + // Work applier priority queue settings. + enableWorkApplierPriorityQueue = flag.Bool("enable-work-applier-priority-queue", false, "If set, the work applier will use a priority queue to process work objects.") + workApplierPriorityLinearEquationCoeffA = flag.Int("work-applier-priority-linear-equation-coeff-a", -3, "The work applier sets the priority for a Work object processing attempt using the linear equation: priority = A * (work object age in minutes) + B. This flag sets the coefficient A in the equation.") + workApplierPriorityLinearEquationCoeffB = flag.Int("work-applier-priority-linear-equation-coeff-b", 100, "The work applier sets the priority for a Work object processing attempt using the linear equation: priority = A * (work object age in minutes) + B. This flag sets the coefficient B in the equation.") + // Azure property provider feature gates. isAzProviderCostPropertiesEnabled = flag.Bool("use-cost-properties-in-azure-provider", true, "If set, the Azure property provider will expose cost properties in the member cluster.") isAzProviderAvailableResPropertiesEnabled = flag.Bool("use-available-res-properties-in-azure-provider", true, "If set, the Azure property provider will expose available resources properties in the member cluster.") @@ -133,6 +137,13 @@ func main() { klog.ErrorS(errors.New("either enable-v1alpha1-apis or enable-v1beta1-apis is required"), "Invalid APIs flags") klog.FlushAndExit(klog.ExitFlushTimeout, 1) } + // TO-DO (chenyu1): refactor the validation logic. + if workApplierPriorityLinearEquationCoeffA == nil || *workApplierPriorityLinearEquationCoeffA >= 0 { + klog.ErrorS(errors.New("parameter workApplierPriorityLinearEquationCoeffA is set incorrectly; must use a value less than 0"), "InvalidFlag", "workApplierPriorityLinearEquationCoeffA") + } + if workApplierPriorityLinearEquationCoeffB == nil || *workApplierPriorityLinearEquationCoeffB <= 0 { + klog.ErrorS(errors.New("parameter workApplierPriorityLinearEquationCoeffB is set incorrectly; must use a value greater than 0"), "InvalidFlag", "workApplierPriorityLinearEquationCoeffB") + } hubURL := os.Getenv("HUB_SERVER_URL") @@ -373,7 +384,7 @@ func Start(ctx context.Context, hubCfg, memberConfig *rest.Config, hubOpts, memb klog.ErrorS(err, "unable to find the required CRD", "GVK", gvk) return err } - // create the work controller, so we can pass it to the internal member cluster reconciler + // Set up the work applier. Note that it is referenced by the InternalMemberCluster controller. // Set up the requeue rate limiter for the work applier. // @@ -413,7 +424,8 @@ func Start(ctx context.Context, hubCfg, memberConfig *rest.Config, hubOpts, memb *workApplierRequeueRateLimiterSkipToFastBackoffForAvailableOrDiffReportedWorkObjs, ) - workController := workapplier.NewReconciler( + workApplier := workapplier.NewReconciler( + "work-applier", hubMgr.GetClient(), targetNS, spokeDynamicClient, @@ -426,12 +438,13 @@ func Start(ctx context.Context, hubCfg, memberConfig *rest.Config, hubOpts, memb // Use the default worker count (4) for parallelized manifest processing. parallelizer.NewParallelizer(parallelizer.DefaultNumOfWorkers), time.Minute*time.Duration(*deletionWaitTime), - *watchWorkWithPriorityQueue, - *watchWorkReconcileAgeMinutes, requeueRateLimiter, + *enableWorkApplierPriorityQueue, + workApplierPriorityLinearEquationCoeffA, + workApplierPriorityLinearEquationCoeffB, ) - if err = workController.SetupWithManager(hubMgr); err != nil { + if err = workApplier.SetupWithManager(hubMgr); err != nil { klog.ErrorS(err, "Failed to create v1beta1 controller", "controller", "work") return err } @@ -459,7 +472,7 @@ func Start(ctx context.Context, hubCfg, memberConfig *rest.Config, hubOpts, memb ctx, hubMgr.GetClient(), memberMgr.GetConfig(), memberMgr.GetClient(), - workController, + workApplier, pp) if err != nil { klog.ErrorS(err, "Failed to create InternalMemberCluster v1beta1 reconciler") diff --git a/pkg/controllers/internalmembercluster/v1beta1/member_suite_test.go b/pkg/controllers/internalmembercluster/v1beta1/member_suite_test.go index 6176e4926..c8707e0ab 100644 --- a/pkg/controllers/internalmembercluster/v1beta1/member_suite_test.go +++ b/pkg/controllers/internalmembercluster/v1beta1/member_suite_test.go @@ -379,7 +379,7 @@ var _ = BeforeSuite(func() { // This controller is created for testing purposes only; no reconciliation loop is actually // run. - workApplier1 = workapplier.NewReconciler(hubClient, member1ReservedNSName, nil, nil, nil, nil, 0, nil, time.Minute, false, 60, nil) + workApplier1 = workapplier.NewReconciler("work-applier-1", hubClient, member1ReservedNSName, nil, nil, nil, nil, 0, nil, time.Minute, nil, false, nil, nil) propertyProvider1 = &manuallyUpdatedProvider{} member1Reconciler, err := NewReconciler(ctx, hubClient, member1Cfg, member1Client, workApplier1, propertyProvider1) @@ -402,7 +402,7 @@ var _ = BeforeSuite(func() { // This controller is created for testing purposes only; no reconciliation loop is actually // run. - workApplier2 = workapplier.NewReconciler(hubClient, member2ReservedNSName, nil, nil, nil, nil, 0, nil, time.Minute, false, 60, nil) + workApplier2 = workapplier.NewReconciler("work-applier-2", hubClient, member2ReservedNSName, nil, nil, nil, nil, 0, nil, time.Minute, nil, false, nil, nil) member2Reconciler, err := NewReconciler(ctx, hubClient, member2Cfg, member2Client, workApplier2, nil) Expect(err).NotTo(HaveOccurred()) diff --git a/pkg/controllers/workapplier/controller.go b/pkg/controllers/workapplier/controller.go index a52fba102..259b0ac9a 100644 --- a/pkg/controllers/workapplier/controller.go +++ b/pkg/controllers/workapplier/controller.go @@ -19,10 +19,10 @@ package workapplier import ( "context" "fmt" + "sync" "time" "go.uber.org/atomic" - "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -41,12 +41,10 @@ import ( ctrloption "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue" - "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" fleetv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" - "github.com/kubefleet-dev/kubefleet/pkg/utils/condition" "github.com/kubefleet-dev/kubefleet/pkg/utils/controller" "github.com/kubefleet-dev/kubefleet/pkg/utils/defaulter" parallelizerutil "github.com/kubefleet-dev/kubefleet/pkg/utils/parallelizer" @@ -60,130 +58,11 @@ const ( workFieldManagerName = "work-api-agent" ) -var ( - workAgeToReconcile = 1 * time.Hour -) - -// Custom type to hold a reconcile.Request and a priority value -type priorityQueueItem struct { - reconcile.Request - Priority int -} - -// PriorityQueueEventHandler is a custom event handler for adding objects to the priority queue. -type PriorityQueueEventHandler struct { - Queue priorityqueue.PriorityQueue[priorityQueueItem] // The priority queue to manage events - Client client.Client // store the client to make API calls -} - -// Implement priorityqueue.Item interface for priorityQueueItem -func (i priorityQueueItem) GetPriority() int { - return i.Priority -} - -func (h *PriorityQueueEventHandler) WorkPendingApply(ctx context.Context, obj client.Object) bool { - var work fleetv1beta1.Work - ns := obj.GetNamespace() - name := obj.GetName() - err := h.Client.Get(ctx, client.ObjectKey{ - Namespace: ns, - Name: name, - }, &work) - if err != nil { - // Log and return - klog.ErrorS(err, "Failed to get the work", "name", name, "ns", ns) - return true - } - availCond := meta.FindStatusCondition(work.Status.Conditions, fleetv1beta1.WorkConditionTypeAvailable) - appliedCond := meta.FindStatusCondition(work.Status.Conditions, fleetv1beta1.WorkConditionTypeApplied) - - if availCond != nil && appliedCond != nil { - // check if the object has been recently modified - availCondLastUpdatedTime := availCond.LastTransitionTime.Time - appliedCondLastUpdatedTime := appliedCond.LastTransitionTime.Time - if time.Since(availCondLastUpdatedTime) < workAgeToReconcile || time.Since(appliedCondLastUpdatedTime) < workAgeToReconcile { - return true - } - } - - if condition.IsConditionStatusTrue(availCond, work.GetGeneration()) && - condition.IsConditionStatusTrue(appliedCond, work.GetGeneration()) { - return false - } - - // Work not yet applied - return true -} - -func (h *PriorityQueueEventHandler) AddToPriorityQueue(ctx context.Context, obj client.Object, alwaysAdd bool) { - req := reconcile.Request{ - NamespacedName: types.NamespacedName{ - Namespace: obj.GetNamespace(), - Name: obj.GetName(), - }, - } - - objAge := time.Since(obj.GetCreationTimestamp().Time) - - var objPriority int - if alwaysAdd || objAge < workAgeToReconcile || h.WorkPendingApply(ctx, obj) { - // Newer or pending objects get higher priority - // Negate the Unix timestamp to give higher priority to newer timestamps - objPriority = -int(time.Now().Unix()) - } else { - // skip adding older objects with no changes - klog.V(2).InfoS("adding old item to priorityQueueItem", "obj", req.Name, "age", objAge) - objPriority = int(obj.GetCreationTimestamp().Unix()) - } - - // Create the custom priorityQueueItem with the request and priority - item := priorityQueueItem{ - Request: req, - Priority: objPriority, - } - - h.Queue.Add(item) - klog.V(2).InfoS("Created PriorityQueueItem", "priority", objPriority, "obj", req.Name, "queue size", h.Queue.Len()) -} - -func (h *PriorityQueueEventHandler) Create(ctx context.Context, evt event.TypedCreateEvent[client.Object], queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { - h.AddToPriorityQueue(ctx, evt.Object, false) -} - -func (h *PriorityQueueEventHandler) Delete(ctx context.Context, evt event.TypedDeleteEvent[client.Object], queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { - h.AddToPriorityQueue(ctx, evt.Object, true) -} - -func (h *PriorityQueueEventHandler) Update(ctx context.Context, evt event.TypedUpdateEvent[client.Object], queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { - // Ignore updates where only the status changed - oldObj := evt.ObjectOld.DeepCopyObject() - newObj := evt.ObjectNew.DeepCopyObject() - - // Zero out the status - if oldWork, ok := oldObj.(*fleetv1beta1.Work); ok { - oldWork.Status = fleetv1beta1.WorkStatus{} - } - if newWork, ok := newObj.(*fleetv1beta1.Work); ok { - newWork.Status = fleetv1beta1.WorkStatus{} - } - - if !equality.Semantic.DeepEqual(oldObj, newObj) { - // ignore status changes to prevent noise - h.AddToPriorityQueue(ctx, evt.ObjectNew, true) - return - } - klog.V(4).InfoS("ignoring update event with only status change", "work", evt.ObjectNew.GetName()) -} - -func (h *PriorityQueueEventHandler) Generic(ctx context.Context, evt event.TypedGenericEvent[client.Object], queue workqueue.TypedRateLimitingInterface[reconcile.Request]) { - h.AddToPriorityQueue(ctx, evt.Object, false) -} - var defaultRequeueRateLimiter *RequeueMultiStageWithExponentialBackoffRateLimiter = NewRequeueMultiStageWithExponentialBackoffRateLimiter( // Allow 1 attempt of fixed delay; this helps give objects a bit of headroom to get available (or have // diffs reported). 1, - // Use a fixed delay of 5 seconds for the first two attempts. + // Use a fixed delay of 5 seconds for the first attempt. // // Important (chenyu1): before the introduction of the requeue rate limiter, the work // applier uses static requeue intervals, specifically 5 seconds (if the work object is unavailable), @@ -216,32 +95,45 @@ var defaultRequeueRateLimiter *RequeueMultiStageWithExponentialBackoffRateLimite // Reconciler reconciles a Work object. type Reconciler struct { - hubClient client.Client - workNameSpace string - spokeDynamicClient dynamic.Interface - spokeClient client.Client - restMapper meta.RESTMapper - recorder record.EventRecorder - concurrentReconciles int - watchWorkWithPriorityQueue bool - watchWorkReconcileAgeMinutes int - deletionWaitTime time.Duration - joined *atomic.Bool - parallelizer parallelizerutil.Parallelizer - requeueRateLimiter *RequeueMultiStageWithExponentialBackoffRateLimiter + // A controller name might be needed as when the priority queue is in use, the work applier + // will watch the Work objects with the Watch() method rather than the For() method, and in this + // case the controller runtime requires a controller name to be explicitly specified. + controllerName string + hubClient client.Client + workNameSpace string + spokeDynamicClient dynamic.Interface + spokeClient client.Client + restMapper meta.RESTMapper + recorder record.EventRecorder + concurrentReconciles int + deletionWaitTime time.Duration + joined *atomic.Bool + parallelizer parallelizerutil.Parallelizer + requeueRateLimiter *RequeueMultiStageWithExponentialBackoffRateLimiter + usePriorityQueue bool + // The custom priority queue in use if the option watchWorkWithPriorityQueue is enabled. + // + // Note that this variable is set only after the controller starts. + pq priorityqueue.PriorityQueue[reconcile.Request] + pqEventHandler *priorityBasedWorkObjEventHandler + priLinearEqCoeffA int + priLinearEqCoeffB int + pqSetupOnce sync.Once } // NewReconciler returns a new Work object reconciler for the work applier. func NewReconciler( + controllerName string, hubClient client.Client, workNameSpace string, spokeDynamicClient dynamic.Interface, spokeClient client.Client, restMapper meta.RESTMapper, recorder record.EventRecorder, concurrentReconciles int, parallelizer parallelizerutil.Parallelizer, deletionWaitTime time.Duration, - watchWorkWithPriorityQueue bool, - watchWorkReconcileAgeMinutes int, requeueRateLimiter *RequeueMultiStageWithExponentialBackoffRateLimiter, + usePriorityQueue bool, + priorityLinearEquationCoeffA *int, + priorityLinearEquationCoeffB *int, ) *Reconciler { if requeueRateLimiter == nil { klog.V(2).InfoS("requeue rate limiter is not set; using the default rate limiter") @@ -251,24 +143,40 @@ func NewReconciler( klog.V(2).InfoS("parallelizer is not set; using the default parallelizer with a worker count of 1") parallelizer = parallelizerutil.NewParallelizer(1) } + if priorityLinearEquationCoeffA == nil || priorityLinearEquationCoeffB == nil { + // Use the default settings if either co-efficient is not set for correctness reasons. + klog.V(2).InfoS("priority linear equation coefficients are not set; using the default settings") + priorityLinearEquationCoeffA = ptr.To(-3) + priorityLinearEquationCoeffB = ptr.To(int(highestPriorityLevel)) + } return &Reconciler{ - hubClient: hubClient, - spokeDynamicClient: spokeDynamicClient, - spokeClient: spokeClient, - restMapper: restMapper, - recorder: recorder, - concurrentReconciles: concurrentReconciles, - parallelizer: parallelizer, - watchWorkWithPriorityQueue: watchWorkWithPriorityQueue, - watchWorkReconcileAgeMinutes: watchWorkReconcileAgeMinutes, - workNameSpace: workNameSpace, - joined: atomic.NewBool(false), - deletionWaitTime: deletionWaitTime, - requeueRateLimiter: requeueRateLimiter, + controllerName: controllerName, + hubClient: hubClient, + spokeDynamicClient: spokeDynamicClient, + spokeClient: spokeClient, + restMapper: restMapper, + recorder: recorder, + concurrentReconciles: concurrentReconciles, + parallelizer: parallelizer, + workNameSpace: workNameSpace, + joined: atomic.NewBool(false), + deletionWaitTime: deletionWaitTime, + requeueRateLimiter: requeueRateLimiter, + usePriorityQueue: usePriorityQueue, + priLinearEqCoeffA: *priorityLinearEquationCoeffA, + priLinearEqCoeffB: *priorityLinearEquationCoeffB, } } +// PriorityQueue returns the priority queue (if any) in use by the reconciler. +// +// Note that the priority queue is only set after the reconciler starts (i.e., the work applier +// has been set up with the controller manager). +func (r *Reconciler) PriorityQueue() priorityqueue.PriorityQueue[reconcile.Request] { + return r.pq +} + type ManifestProcessingApplyOrReportDiffResultType string const ( @@ -510,6 +418,17 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu // status as a trigger for resetting the rate limiter. requeueDelay := r.requeueRateLimiter.When(work, bundles) klog.V(2).InfoS("Requeue the Work object for re-processing", "work", workRef, "delaySeconds", requeueDelay.Seconds()) + if r.usePriorityQueue { + // A priority queue is in use; requeue the Work object with custom logic. + // + // This is needed as the default controller runtime requeueing behavior will always attempt to + // requeue a request with its original priority; with work applier's periodic requeueing mechanism, + // this might lead to a situation where a request always gets processed with high priority, even + // though such preference no longer applies. + r.pqEventHandler.Requeue(work, requeueDelay) + return ctrl.Result{}, nil + } + // No priority queue is in use; requeue the Work object with the default controller runtime logic. return ctrl.Result{RequeueAfter: requeueDelay}, nil } @@ -731,27 +650,35 @@ func (r *Reconciler) Leave(ctx context.Context) error { // SetupWithManager wires up the controller. func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { - // Create the priority queue using the rate limiter and a queue name - queue := priorityqueue.New[priorityQueueItem]("apply-work-queue") + if r.usePriorityQueue { + eventHandler := &priorityBasedWorkObjEventHandler{ + qm: r, + priLinearEqCoeffA: r.priLinearEqCoeffA, + priLinearEqCoeffB: r.priLinearEqCoeffB, + } + r.pqEventHandler = eventHandler - // Create the event handler that uses the priority queue - eventHandler := &PriorityQueueEventHandler{ - Queue: queue, // Attach the priority queue to the event handler - Client: r.hubClient, - } + newPQ := func(controllerName string, rateLimiter workqueue.TypedRateLimiter[reconcile.Request]) workqueue.TypedRateLimitingInterface[reconcile.Request] { + withRateLimiterOpt := func(opts *priorityqueue.Opts[reconcile.Request]) { + opts.RateLimiter = rateLimiter + } + r.pqSetupOnce.Do(func() { + r.pq = priorityqueue.New(controllerName, withRateLimiterOpt) + }) + return r.pq + } - if r.watchWorkWithPriorityQueue { - workAgeToReconcile = time.Duration(r.watchWorkReconcileAgeMinutes) * time.Minute - return ctrl.NewControllerManagedBy(mgr).Named("work-applier-controller"). + return ctrl.NewControllerManagedBy(mgr).Named(r.controllerName). WithOptions(ctrloption.Options{ MaxConcurrentReconciles: r.concurrentReconciles, + NewQueue: newPQ, }). - For(&fleetv1beta1.Work{}). + // Use custom event handler to allow access to the priority queue interface. Watches(&fleetv1beta1.Work{}, eventHandler). Complete(r) } - return ctrl.NewControllerManagedBy(mgr).Named("work-applier-controller"). + return ctrl.NewControllerManagedBy(mgr).Named(r.controllerName). WithOptions(ctrloption.Options{ MaxConcurrentReconciles: r.concurrentReconciles, }). diff --git a/pkg/controllers/workapplier/pq.go b/pkg/controllers/workapplier/pq.go new file mode 100644 index 000000000..1a6d01215 --- /dev/null +++ b/pkg/controllers/workapplier/pq.go @@ -0,0 +1,294 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package workapplier + +import ( + "context" + "fmt" + "time" + + "github.com/kubefleet-dev/kubefleet/pkg/utils/condition" + "github.com/kubefleet-dev/kubefleet/pkg/utils/controller" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + fleetv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" +) + +// Note (chenyu1): the work applier is set to periodically requeue Work objects for processing; +// when the KubeFleet agent restarts, the work applier will also have to re-process all the existing +// Work objects for correctness reasons. In environments where a large number of Work objects +// are present, with the default FIFO queue implementation, the work applier might have to spend +// a significant amount of time processing old Work objects that have not been changed for a while +// and are already in a consistent state, before it can get to process recently created/updated Work +// objects, which results in increased latency for new placements to complete. To address this issue, +// we enabled the usage of priority queues in the work applier, so that events +// from newly created/updated Work objects will be processed first. + +const ( + // A list of priority levels for the work applier priority queue. + // + // The work applier, when a priority queue is in use, will prioritize requests in the following + // order: + // * with high priority (>0): all Create/Delete events, and all Update events + // that concern recently created Work objects or Work objects that are in a failed/undeterminted + // state (apply op/availability check failure, or diff reporting failure). For specifics, + // see the implementation details below. + // Note that the top priority level is capped at 100 for consistency/cleanness reasons. + // * with default priority (0): all other Update events. + // * with low priority (-1): all requeues, and all Generic events. + // * errors will be requeued in the request's original priority level. + // + // Note that requests with the same priority level will be processed in the FIFO order. + highestPriorityLevel = 100 + defaultPriorityLevel = 0 + lowPriorityLevel = -1 +) + +type CustomPriorityQueueManager interface { + PriorityQueue() priorityqueue.PriorityQueue[reconcile.Request] +} + +var _ handler.TypedEventHandler[client.Object, reconcile.Request] = &priorityBasedWorkObjEventHandler{} + +// priorityBasedWorkObjEventHandler implements the TypedEventHandler interface. +// +// It is used to process work object events in a priority-based manner with a priority queue. +type priorityBasedWorkObjEventHandler struct { + qm CustomPriorityQueueManager + + priLinearEqCoeffA int + priLinearEqCoeffB int +} + +// calcArgBasedPriWithLinearEquation calculates the priority for a work object +// based on its age using a linear equation: Pri(Work) = A * AgeSinceCreationInMinutes(Work) + B, +// where A and B are user-configurable coefficients. +// +// The calculated priority is capped between defaultPriorityLevel and highestPriorityLevel. +func (h *priorityBasedWorkObjEventHandler) calcArgBasedPriWithLinearEquation(workObjAgeMinutes int) int { + priority := h.priLinearEqCoeffA*workObjAgeMinutes + h.priLinearEqCoeffB + if priority < defaultPriorityLevel { + return defaultPriorityLevel + } + if priority > highestPriorityLevel { + return highestPriorityLevel + } + return priority +} + +// Create implements the TypedEventHandler interface. +func (h *priorityBasedWorkObjEventHandler) Create(_ context.Context, createEvent event.TypedCreateEvent[client.Object], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + // Do a sanity check. + // + // Normally when this method is called, the priority queue has been initialized. The check is added + // as the implementation has no control over when this method is called. + if h.qm.PriorityQueue() == nil { + wrappedErr := fmt.Errorf("received a Create event, but the priority queue is not initialized") + _ = controller.NewUnexpectedBehaviorError(wrappedErr) + klog.ErrorS(wrappedErr, "Failed to process Create event") + return + } + + // Enqueue the request with the highest priority. + opts := priorityqueue.AddOpts{ + Priority: ptr.To(highestPriorityLevel), + } + workObjName := createEvent.Object.GetName() + workObjNS := createEvent.Object.GetNamespace() + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: workObjNS, + Name: workObjName, + }, + } + h.qm.PriorityQueue().AddWithOpts(opts, req) +} + +// Delete implements the TypedEventHandler interface. +func (h *priorityBasedWorkObjEventHandler) Delete(_ context.Context, deleteEvent event.TypedDeleteEvent[client.Object], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + // Do a sanity check. + if h.qm.PriorityQueue() == nil { + wrappedErr := fmt.Errorf("received a Delete event, but the priority queue is not initialized") + _ = controller.NewUnexpectedBehaviorError(wrappedErr) + klog.ErrorS(wrappedErr, "Failed to process Delete event") + return + } + + // Enqueue the request with the highest priority. + opts := priorityqueue.AddOpts{ + Priority: ptr.To(highestPriorityLevel), + } + workObjName := deleteEvent.Object.GetName() + workObjNS := deleteEvent.Object.GetNamespace() + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: workObjNS, + Name: workObjName, + }, + } + h.qm.PriorityQueue().AddWithOpts(opts, req) +} + +// Update implements the TypedEventHandler interface. +func (h *priorityBasedWorkObjEventHandler) Update(_ context.Context, updateEvent event.TypedUpdateEvent[client.Object], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + // Do a sanity check. + if h.qm.PriorityQueue() == nil { + wrappedErr := fmt.Errorf("received an Update event, but the priority queue is not initialized") + _ = controller.NewUnexpectedBehaviorError(wrappedErr) + klog.ErrorS(wrappedErr, "Failed to process Update event") + return + } + + // Ignore status only updates. + if updateEvent.ObjectOld.GetGeneration() == updateEvent.ObjectNew.GetGeneration() { + return + } + + // No need to check the updated Work object, as both objects have the same creation timestamp, + // and the prioritization logic concerns only the old Work object's status. + oldWorkObj, oldOK := updateEvent.ObjectOld.(*fleetv1beta1.Work) + if !oldOK { + wrappedErr := fmt.Errorf("received an Update event, but the objects cannot be cast to Work objects") + _ = controller.NewUnexpectedBehaviorError(wrappedErr) + klog.ErrorS(wrappedErr, "Failed to process Update event") + return + } + + pri := h.determineUpdateEventPriority(oldWorkObj) + opts := priorityqueue.AddOpts{ + Priority: ptr.To(pri), + } + workObjName := oldWorkObj.GetName() + workObjNS := oldWorkObj.GetNamespace() + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: workObjNS, + Name: workObjName, + }, + } + h.qm.PriorityQueue().AddWithOpts(opts, req) +} + +// Generic implements the TypedEventHandler interface. +func (h *priorityBasedWorkObjEventHandler) Generic(_ context.Context, genericEvent event.TypedGenericEvent[client.Object], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + // Do a sanity check. + if h.qm.PriorityQueue() == nil { + wrappedErr := fmt.Errorf("received a Generic event, but the priority queue is not initialized") + _ = controller.NewUnexpectedBehaviorError(wrappedErr) + klog.ErrorS(wrappedErr, "Failed to process Generic event") + return + } + + // Enqueue the request with low priority. + opts := priorityqueue.AddOpts{ + Priority: ptr.To(lowPriorityLevel), + } + workObjName := genericEvent.Object.GetName() + workObjNS := genericEvent.Object.GetNamespace() + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: workObjNS, + Name: workObjName, + }, + } + h.qm.PriorityQueue().AddWithOpts(opts, req) +} + +func (h *priorityBasedWorkObjEventHandler) determineUpdateEventPriority(oldWorkObj *fleetv1beta1.Work) int { + // If the work object is recently created (its age is within the given threshold), + // process its Update event with high priority. + + // The age is expected to be the same for both old and new work objects, as the field + // is immutable and not user configurable. + workObjAgeMinutes := int(time.Since(oldWorkObj.CreationTimestamp.Time).Minutes()) + + // Check if the work object is in a failed/undetermined state. + // + // * If the work object is in such a state, process its Update event with highest priority (even if the work object + // was created long ago); + // * Otherwise, prioritize the processing of the Update event if the work object is recently created; + // * Use the default priority level for all other cases. + oldApplyStrategy := oldWorkObj.Spec.ApplyStrategy + isReportDiffModeEnabled := oldApplyStrategy != nil && oldApplyStrategy.Type == fleetv1beta1.ApplyStrategyTypeReportDiff + + appliedCond := meta.FindStatusCondition(oldWorkObj.Status.Conditions, fleetv1beta1.WorkConditionTypeApplied) + availableCond := meta.FindStatusCondition(oldWorkObj.Status.Conditions, fleetv1beta1.WorkConditionTypeAvailable) + diffReportedCond := meta.FindStatusCondition(oldWorkObj.Status.Conditions, fleetv1beta1.WorkConditionTypeDiffReported) + + // Note (chenyu1): it might be true that the Update event involves an apply strategy change; however, the prioritization + // logic stays the same: if the old work object is in a failed/undetermined state, the apply strategy change + // should receive the highest priority; otherwise, the Update event should be processed with medium priority. + switch { + case isReportDiffModeEnabled && condition.IsConditionStatusTrue(diffReportedCond, oldWorkObj.Generation): + // The ReportDiff mode is enabled and the status suggests that the diff reporting has been completed successfully. + // Determine the priority level based on the age of the Work object. + return h.calcArgBasedPriWithLinearEquation(workObjAgeMinutes) + case isReportDiffModeEnabled: + // The ReportDiff mode is enabled, but the diff reporting has not been completed yet or has failed. + // Use high priority for the Update event. + return highestPriorityLevel + case condition.IsConditionStatusTrue(appliedCond, oldWorkObj.Generation) && condition.IsConditionStatusTrue(availableCond, oldWorkObj.Generation): + // The apply strategy is set to the CSA/SSA mode and the work object is applied and available. + // Determine the priority level based on the age of the Work object. + return h.calcArgBasedPriWithLinearEquation(workObjAgeMinutes) + default: + // The apply strategy is set to the CSA/SSA mode and the work object is in a failed/undetermined state. + // Use high priority for the Update event. + return highestPriorityLevel + } +} + +// Requeue requeues the given work object for processing after the given delay. +func (h *priorityBasedWorkObjEventHandler) Requeue(work *fleetv1beta1.Work, delay time.Duration) { + // Do a sanity check. + if h.qm.PriorityQueue() == nil { + wrappedErr := fmt.Errorf("received a requeue request, but the priority queue is not initialized") + _ = controller.NewUnexpectedBehaviorError(wrappedErr) + klog.ErrorS(wrappedErr, "Failed to process Requeue request") + return + } + + opts := priorityqueue.AddOpts{ + // All requeued requests have the low priority level. + Priority: ptr.To(int(lowPriorityLevel)), + } + // Set the RateLimited flag in consistency with the controller runtime requeueing behavior. + if delay == 0 { + opts.RateLimited = true + } else { + opts.After = delay + } + + workObjName := work.GetName() + workObjNS := work.GetNamespace() + req := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: workObjNS, + Name: workObjName, + }, + } + h.qm.PriorityQueue().AddWithOpts(opts, req) +} diff --git a/pkg/controllers/workapplier/pq_test.go b/pkg/controllers/workapplier/pq_test.go new file mode 100644 index 000000000..b33f715b9 --- /dev/null +++ b/pkg/controllers/workapplier/pq_test.go @@ -0,0 +1,721 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package workapplier + +import ( + "context" + "fmt" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/google/go-cmp/cmp" + fleetv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" +) + +const ( + pqName = "test-pq" + workNameForPriorityTestingTmpl = "prioritized-work-%s" +) + +type pqWrapper struct { + pq priorityqueue.PriorityQueue[reconcile.Request] +} + +// PriorityQueue implements the CustomPriorityQueueManager interface. +func (p *pqWrapper) PriorityQueue() priorityqueue.PriorityQueue[reconcile.Request] { + return p.pq +} + +// Verify that pqWrapper implements the CustomPriorityQueueManager interface. +var _ CustomPriorityQueueManager = &pqWrapper{} + +// ExpectedDequeuedKeyAndPriority is used in tests to represent an expected dequeued key along with its priority. +type ExpectedDequeuedKeyAndPriority struct { + Key reconcile.Request + Priority int +} + +// TestCreateEventHandler tests the Create event handler of the priority-based Work object event handler. +func TestCreateEventHandler(t *testing.T) { + ctx := context.Background() + pq := priorityqueue.New[reconcile.Request](pqName) + pqEventHandler := &priorityBasedWorkObjEventHandler{ + qm: &pqWrapper{pq: pq}, + // For simplicity reasons, set all Update events for completed Work objects to have the priority level of 80 + // regardless of their ages. + priLinearEqCoeffA: 0, + priLinearEqCoeffB: 80, + } + + // Add two keys with default and low priority levels respectively. + opts := priorityqueue.AddOpts{ + Priority: ptr.To(defaultPriorityLevel), + } + workWithDefaultPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "default") + key := types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithDefaultPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + opts = priorityqueue.AddOpts{ + Priority: ptr.To(lowPriorityLevel), + } + workWithLowPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "low") + key = types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithLowPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + // Handle a CreateEvent, which should add a new key with high priority. + workJustCreatedName := fmt.Sprintf(workNameForPriorityTestingTmpl, "just-created") + workObj := fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workJustCreatedName, + }, + } + pqEventHandler.Create(ctx, event.TypedCreateEvent[client.Object]{Object: &workObj}, nil) + + // Check the queue length. + if !cmp.Equal(pq.Len(), 3) { + t.Fatalf("priority queue length, expected 3, got %d", pq.Len()) + } + + // Check if the first item dequeued is the one added by the CreateEvent handler (high priority). + item, pri, _ := pq.GetWithPriority() + wantItem := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workJustCreatedName, + }, + } + if diff := cmp.Diff(item, wantItem); diff != "" { + t.Errorf("dequeued item mismatch (-got, +want):\n%s", diff) + } + if !cmp.Equal(pri, highestPriorityLevel) { + t.Errorf("priority of dequeued item, expected %d, got %d", highestPriorityLevel, pri) + } +} + +// TestUpdateEventHandler_NormalOps tests the Update event handler of the priority-based Work object event handler +// under normal operations. +func TestUpdateEventHandler_NormalOps(t *testing.T) { + ctx := context.Background() + pq := priorityqueue.New[reconcile.Request](pqName) + pqEventHandler := &priorityBasedWorkObjEventHandler{ + qm: &pqWrapper{pq: pq}, + priLinearEqCoeffA: -10, + priLinearEqCoeffB: 80, + } + + // Add a key with low priority level. + opts := priorityqueue.AddOpts{ + Priority: ptr.To(lowPriorityLevel), + } + workWithLowPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "low") + key := types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithLowPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + // Handle an UpdateEvent that concerns a Work object with ReportDiff strategy and has been + // processed successfully long ago (30 minutes). + workInReportDiffModeAndProcessedLongBfrName := fmt.Sprintf(workNameForPriorityTestingTmpl, "report-diff-processed-long-bfr") + longAgo := time.Now().Add(-time.Minute * 30) + oldWorkObj := &fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workInReportDiffModeAndProcessedLongBfrName, + CreationTimestamp: metav1.Time{Time: longAgo}, + }, + Spec: fleetv1beta1.WorkSpec{ + ApplyStrategy: &fleetv1beta1.ApplyStrategy{ + Type: fleetv1beta1.ApplyStrategyTypeReportDiff, + }, + }, + Status: fleetv1beta1.WorkStatus{ + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeDiffReported, + Status: metav1.ConditionTrue, + }, + }, + }, + } + newWorkObj := oldWorkObj.DeepCopy() + // Simulate an update. + newWorkObj.Generation += 1 + pqEventHandler.Update(ctx, event.TypedUpdateEvent[client.Object]{ObjectOld: oldWorkObj, ObjectNew: newWorkObj}, nil) + + // Handle an UpdateEvent that concerns a normal Work object that was created very recently (2 minutes ago). + workInCSAModeAndJustProcessedName := fmt.Sprintf(workNameForPriorityTestingTmpl, "csa-just-processed") + shortWhileAgo := time.Now().Add(-time.Minute * 2) + oldWorkObj = &fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workInCSAModeAndJustProcessedName, + CreationTimestamp: metav1.Time{Time: shortWhileAgo}, + }, + Spec: fleetv1beta1.WorkSpec{}, + Status: fleetv1beta1.WorkStatus{ + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + }, + }, + }, + } + newWorkObj = oldWorkObj.DeepCopy() + // Simulate an update. + newWorkObj.Generation += 1 + pqEventHandler.Update(ctx, event.TypedUpdateEvent[client.Object]{ObjectOld: oldWorkObj, ObjectNew: newWorkObj}, nil) + + // Check the queue length. + if !cmp.Equal(pq.Len(), 3) { + t.Fatalf("priority queue length, expected 3, got %d", pq.Len()) + } + + // Dequeue all items and check if the keys and their assigned priorities are as expected. + wantDequeuedItemsWithPriorities := []ExpectedDequeuedKeyAndPriority{ + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workInCSAModeAndJustProcessedName, + }, + }, + Priority: 60, // -10 * 2 + 80 = 60 + }, + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workInReportDiffModeAndProcessedLongBfrName, + }, + }, + Priority: defaultPriorityLevel, // -10 * 30 + 80 = -220 -> capped to defaultPriorityLevel (0) + }, + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workWithLowPriName, + }, + }, + Priority: lowPriorityLevel, + }, + } + + for i := 0; i < 3; i++ { + item, pri, _ := pq.GetWithPriority() + wantItemWithPri := wantDequeuedItemsWithPriorities[i] + if diff := cmp.Diff(item, wantItemWithPri.Key); diff != "" { + t.Errorf("dequeued item #%d mismatch (-got, +want):\n%s", i, diff) + } + if !cmp.Equal(pri, wantItemWithPri.Priority) { + t.Errorf("priority of dequeued item #%d, expected %d, got %d", i, wantItemWithPri.Priority, pri) + } + } +} + +// TestUpdateEventHandler_Erred tests the Update event handler of the priority-based Work object event handler +// when it encounters errors. +func TestUpdateEventHandler_Erred(t *testing.T) { + ctx := context.Background() + + workObj := &fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: fmt.Sprintf(workNameForPriorityTestingTmpl, "erred"), + }, + } + statusOnlyUpdateEvent := event.TypedUpdateEvent[client.Object]{ + ObjectOld: workObj, + ObjectNew: workObj.DeepCopy(), + } + + nsObj := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: nsName, + Generation: 1, + }, + } + invalidUpdateEvent := event.TypedUpdateEvent[client.Object]{ + ObjectOld: nsObj, + ObjectNew: workObj, + } + + testCases := []struct { + name string + updateEvent event.TypedUpdateEvent[client.Object] + }{ + { + name: "status only update", + updateEvent: statusOnlyUpdateEvent, + }, + { + // Normally this should never occur. + name: "invalid update event with the old object not being a Work", + updateEvent: invalidUpdateEvent, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pq := priorityqueue.New[reconcile.Request](pqName) + pqEventHandler := &priorityBasedWorkObjEventHandler{ + qm: &pqWrapper{pq: pq}, + } + pqEventHandler.Update(ctx, tc.updateEvent, nil) + + // Check the queue length. + if !cmp.Equal(pq.Len(), 0) { + t.Fatalf("priority queue length, expected 0, got %d", pq.Len()) + } + }) + } +} + +// TestDeleteEventHandler tests the Delete event handler of the priority-based Work object event handler. +func TestDeleteEventHandler(t *testing.T) { + ctx := context.Background() + pq := priorityqueue.New[reconcile.Request](pqName) + pqEventHandler := &priorityBasedWorkObjEventHandler{ + qm: &pqWrapper{pq: pq}, + // For simplicity reasons, set all Update events for completed Work objects to have the priority level of 80 + // regardless of their ages. + priLinearEqCoeffA: 0, + priLinearEqCoeffB: 80, + } + + // Add two keys with default and low priority levels respectively. + opts := priorityqueue.AddOpts{ + Priority: ptr.To(defaultPriorityLevel), + } + workWithDefaultPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "default") + key := types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithDefaultPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + opts = priorityqueue.AddOpts{ + Priority: ptr.To(lowPriorityLevel), + } + workWithLowPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "low") + key = types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithLowPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + // Handle a DeleteEvent, which should add a new key with the highest priority. + workJustDeletedName := fmt.Sprintf(workNameForPriorityTestingTmpl, "just-deleted") + workObj := fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workJustDeletedName, + }, + } + pqEventHandler.Delete(ctx, event.TypedDeleteEvent[client.Object]{Object: &workObj}, nil) + + // Check the queue length. + if !cmp.Equal(pq.Len(), 3) { + t.Fatalf("priority queue length, expected 3, got %d", pq.Len()) + } + + // Check if the first item dequeued is the one added by the DeleteEvent handler (high priority). + item, pri, _ := pq.GetWithPriority() + wantItem := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workJustDeletedName, + }, + } + if diff := cmp.Diff(item, wantItem); diff != "" { + t.Errorf("dequeued item mismatch (-got, +want):\n%s", diff) + } + if !cmp.Equal(pri, highestPriorityLevel) { + t.Errorf("priority of dequeued item, expected %d, got %d", highestPriorityLevel, pri) + } +} + +// TestGenericEventHandler tests the Generic event handler of the priority-based Work object event handler. +func TestGenericEventHandler(t *testing.T) { + ctx := context.Background() + pq := priorityqueue.New[reconcile.Request](pqName) + pqEventHandler := &priorityBasedWorkObjEventHandler{ + qm: &pqWrapper{pq: pq}, + // For simplicity reasons, set all Update events for completed Work objects to have the priority level of 80 + // regardless of their ages. + priLinearEqCoeffA: 0, + priLinearEqCoeffB: 80, + } + + // Add two keys with highest and default priority levels respectively. + opts := priorityqueue.AddOpts{ + Priority: ptr.To(highestPriorityLevel), + } + workWithHighestPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "highest") + key := types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithHighestPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + opts = priorityqueue.AddOpts{ + Priority: ptr.To(defaultPriorityLevel), + } + workWithDefaultPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "default") + key = types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithDefaultPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + // Handle a GenericEvent, which should add a new key with low priority. + workGenericEventName := fmt.Sprintf(workNameForPriorityTestingTmpl, "generic") + workObj := fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workGenericEventName, + }, + } + pqEventHandler.Generic(ctx, event.TypedGenericEvent[client.Object]{Object: &workObj}, nil) + + // Check the queue length. + if !cmp.Equal(pq.Len(), 3) { + t.Fatalf("priority queue length, expected 3, got %d", pq.Len()) + } + + // Dequeue all items and check if the keys and their assigned priorities are as expected. + wantDequeuedItemsWithPriorities := []ExpectedDequeuedKeyAndPriority{ + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workWithHighestPriName, + }, + }, + Priority: highestPriorityLevel, + }, + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workWithDefaultPriName, + }, + }, + Priority: defaultPriorityLevel, + }, + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workGenericEventName, + }, + }, + Priority: lowPriorityLevel, + }, + } + + for i := 0; i < 3; i++ { + item, pri, _ := pq.GetWithPriority() + wantItemWithPri := wantDequeuedItemsWithPriorities[i] + if diff := cmp.Diff(item, wantItemWithPri.Key); diff != "" { + t.Errorf("dequeued item #%d mismatch (-got, +want):\n%s", i, diff) + } + if !cmp.Equal(pri, wantItemWithPri.Priority) { + t.Errorf("priority of dequeued item #%d, expected %d, got %d", i, wantItemWithPri.Priority, pri) + } + } +} + +func TestDetermineUpdateEventPriority(t *testing.T) { + now := metav1.Now() + longAgo := metav1.NewTime(now.Add(-time.Minute * 30)) + + pq := priorityqueue.New[reconcile.Request](pqName) + pqEventHandler := &priorityBasedWorkObjEventHandler{ + qm: &pqWrapper{pq: pq}, + priLinearEqCoeffA: -10, + priLinearEqCoeffB: 80, + } + + testCases := []struct { + name string + oldWorkObj *fleetv1beta1.Work + wantPriority int + }{ + { + name: "reportDiff mode, diff reported", + oldWorkObj: &fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workName, + CreationTimestamp: longAgo, + }, + Spec: fleetv1beta1.WorkSpec{ + ApplyStrategy: &fleetv1beta1.ApplyStrategy{ + Type: fleetv1beta1.ApplyStrategyTypeReportDiff, + }, + }, + Status: fleetv1beta1.WorkStatus{ + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeDiffReported, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + wantPriority: defaultPriorityLevel, + }, + { + name: "reportDiff mode, diff not reported", + oldWorkObj: &fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workName, + CreationTimestamp: longAgo, + }, + Spec: fleetv1beta1.WorkSpec{ + ApplyStrategy: &fleetv1beta1.ApplyStrategy{ + Type: fleetv1beta1.ApplyStrategyTypeReportDiff, + }, + }, + Status: fleetv1beta1.WorkStatus{ + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeDiffReported, + Status: metav1.ConditionFalse, + }, + }, + }, + }, + wantPriority: highestPriorityLevel, + }, + { + name: "CSA/SSA mode, applied and available", + oldWorkObj: &fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workName, + CreationTimestamp: longAgo, + }, + Spec: fleetv1beta1.WorkSpec{}, + Status: fleetv1beta1.WorkStatus{ + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + wantPriority: defaultPriorityLevel, + }, + { + name: "CSA/SSA mode, not applied and available", + oldWorkObj: &fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: workName, + CreationTimestamp: longAgo, + }, + Spec: fleetv1beta1.WorkSpec{}, + Status: fleetv1beta1.WorkStatus{ + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionFalse, + }, + }, + }, + }, + wantPriority: highestPriorityLevel, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pri := pqEventHandler.determineUpdateEventPriority(tc.oldWorkObj) + if !cmp.Equal(pri, tc.wantPriority) { + t.Errorf("determined priority, expected %d, got %d", tc.wantPriority, pri) + } + }) + } +} + +// TestCalcArgBasedPriWithLinearEquation tests the calcArgBasedPriWithLinearEquation method. +func TestCalculateArgBasedPriWithLinearEquation(t *testing.T) { + pqEventHandler := &priorityBasedWorkObjEventHandler{ + priLinearEqCoeffA: -10, + priLinearEqCoeffB: highestPriorityLevel + 20, // 120 + } + + testCases := []struct { + name string + workObjAgeMinutes int + wantPri int + }{ + { + name: "just created (capped)", + workObjAgeMinutes: 0, + wantPri: highestPriorityLevel, + }, + { + name: "5 minutes old", + workObjAgeMinutes: 5, + wantPri: 70, + }, + { + name: "8 minutes old", + workObjAgeMinutes: 8, + wantPri: 40, + }, + { + name: "15 minutes old (capped)", + workObjAgeMinutes: 15, + wantPri: defaultPriorityLevel, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pri := pqEventHandler.calcArgBasedPriWithLinearEquation(tc.workObjAgeMinutes) + if !cmp.Equal(pri, tc.wantPri) { + t.Errorf("calculated priority, expected %d, got %d", tc.wantPri, pri) + } + }) + } +} + +// TestRequeue tests the Requeue method of the priority-based Work object event handler. +func TestRequeue(t *testing.T) { + pq := priorityqueue.New[reconcile.Request](pqName) + pqEventHandler := &priorityBasedWorkObjEventHandler{ + qm: &pqWrapper{pq: pq}, + // For simplicity reasons, set all Update events for completed Work objects to have the priority level of 80 + // regardless of their ages. + priLinearEqCoeffA: 0, + priLinearEqCoeffB: 80, + } + + // Add two keys with highest and default priority levels respectively. + opts := priorityqueue.AddOpts{ + Priority: ptr.To(highestPriorityLevel), + } + workWithHighestPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "highest") + key := types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithHighestPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + opts = priorityqueue.AddOpts{ + Priority: ptr.To(defaultPriorityLevel), + } + workWithDefaultPriName := fmt.Sprintf(workNameForPriorityTestingTmpl, "default") + key = types.NamespacedName{Namespace: memberReservedNSName1, Name: workWithDefaultPriName} + pq.AddWithOpts(opts, reconcile.Request{NamespacedName: key}) + + // Requeue a request immediately, which should add a new key with low priority. + requeuedWork := fmt.Sprintf(workNameForPriorityTestingTmpl, "requeued") + workObj := fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: requeuedWork, + }, + } + pqEventHandler.Requeue(&workObj, 0) + + // Requeue a request with some delay, which should also add a new key with low priority. + requeuedWorkWithDelay := fmt.Sprintf(workNameForPriorityTestingTmpl, "requeued-with-delay") + workObjWithDelay := fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: memberReservedNSName1, + Name: requeuedWorkWithDelay, + }, + } + pqEventHandler.Requeue(&workObjWithDelay, time.Second*5) + + // Wait for a very short while as the default rate limiter has a base delay. + time.Sleep(time.Millisecond * 100) + + // Check the queue length. + if !cmp.Equal(pq.Len(), 3) { + t.Fatalf("priority queue length, expected 3, got %d", pq.Len()) + } + + // Wait for the delay to pass (add 1 second as buffer). + time.Sleep(time.Second * 6) + + // Check the queue length again. + if !cmp.Equal(pq.Len(), 4) { + t.Fatalf("priority queue length after delay, expected 4, got %d", pq.Len()) + } + + // Dequeue all items and check if the keys and their assigned priorities are as expected. + wantDequeuedItemsWithPriorities := []ExpectedDequeuedKeyAndPriority{ + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workWithHighestPriName, + }, + }, + Priority: highestPriorityLevel, + }, + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: workWithDefaultPriName, + }, + }, + Priority: defaultPriorityLevel, + }, + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: requeuedWork, + }, + }, + Priority: lowPriorityLevel, + }, + { + Key: reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: memberReservedNSName1, + Name: requeuedWorkWithDelay, + }, + }, + Priority: lowPriorityLevel, + }, + } + + for i := 0; i < 4; i++ { + item, pri, _ := pq.GetWithPriority() + wantItemWithPri := wantDequeuedItemsWithPriorities[i] + if diff := cmp.Diff(item, wantItemWithPri.Key); diff != "" { + t.Errorf("dequeued item #%d mismatch (-got, +want):\n%s", i, diff) + } + if !cmp.Equal(pri, wantItemWithPri.Priority) { + t.Errorf("priority of dequeued item #%d, expected %d, got %d", i, wantItemWithPri.Priority, pri) + } + } +} diff --git a/pkg/controllers/workapplier/suite_test.go b/pkg/controllers/workapplier/suite_test.go index 01787af13..df8582344 100644 --- a/pkg/controllers/workapplier/suite_test.go +++ b/pkg/controllers/workapplier/suite_test.go @@ -37,15 +37,12 @@ import ( "k8s.io/klog/v2" "k8s.io/klog/v2/textlogger" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" - ctrloption "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/envtest" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/controller-runtime/pkg/predicate" fleetv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" "github.com/kubefleet-dev/kubefleet/pkg/utils/parallelizer" @@ -300,6 +297,7 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) workApplier1 = NewReconciler( + "work-applier", hubClient, memberReservedNSName1, memberDynamicClient1, @@ -309,9 +307,10 @@ var _ = BeforeSuite(func() { maxConcurrentReconciles, parallelizer.NewParallelizer(workerCount), 30*time.Second, - true, - 60, - nil, // Use the default backoff rate limiter. + nil, // Use the default backoff rate limiter. + false, // Disable priority queueing. + nil, // Use the default priority linear equation coefficients. + nil, // Use the default priority linear equation coefficients. ) Expect(workApplier1.SetupWithManager(hubMgr1)).To(Succeed()) @@ -349,27 +348,22 @@ var _ = BeforeSuite(func() { true, ) workApplier2 = NewReconciler( + "work-applier-long-backoff", hubClient, memberReservedNSName2, memberDynamicClient2, memberClient2, memberClient2.RESTMapper(), - hubMgr2.GetEventRecorderFor("work-applier"), + hubMgr2.GetEventRecorderFor("work-applier-long-backoff"), maxConcurrentReconciles, parallelizer.NewParallelizer(workerCount), 30*time.Second, - true, - 60, superLongExponentialBackoffRateLimiter, + false, // Disable priority queueing. + nil, // Use the default priority linear equation coefficients. + nil, // Use the default priority linear equation coefficients. ) - // Due to name conflicts, the second work applier must be set up manually. - err = ctrl.NewControllerManagedBy(hubMgr2).Named("work-applier-controller-exponential-backoff"). - WithOptions(ctrloption.Options{ - MaxConcurrentReconciles: workApplier2.concurrentReconciles, - }). - For(&fleetv1beta1.Work{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). - Complete(workApplier2) - Expect(err).NotTo(HaveOccurred()) + Expect(workApplier2.SetupWithManager(hubMgr2)).To(Succeed()) By("Setting up the controller and the controller manager for member cluster 3") hubMgr3, err = ctrl.NewManager(hubCfg, ctrl.Options{ @@ -393,6 +387,7 @@ var _ = BeforeSuite(func() { delay: parallelizerFixedDelay, } workApplier3 = NewReconciler( + "work-applier-waved-parallel-processing", hubClient, memberReservedNSName3, memberDynamicClient3, @@ -402,18 +397,12 @@ var _ = BeforeSuite(func() { maxConcurrentReconciles, pWithDelay, 30*time.Second, - true, - 60, - nil, // Use the default backoff rate limiter. + nil, // Use the default backoff rate limiter. + false, // Disable priority queueing. + nil, // Use the default priority linear equation coefficients. + nil, // Use the default priority linear equation coefficients. ) - // Due to name conflicts, the third work applier must be set up manually. - err = ctrl.NewControllerManagedBy(hubMgr3).Named("work-applier-controller-waved-parallel-processing"). - WithOptions(ctrloption.Options{ - MaxConcurrentReconciles: workApplier3.concurrentReconciles, - }). - For(&fleetv1beta1.Work{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). - Complete(workApplier3) - Expect(err).NotTo(HaveOccurred()) + Expect(workApplier3.SetupWithManager(hubMgr3)).To(Succeed()) By("Setting up the controller and the controller manager for member cluster 4") hubMgr4, err = ctrl.NewManager(hubCfg, ctrl.Options{ @@ -435,27 +424,23 @@ var _ = BeforeSuite(func() { wrappedMemberClient4 := NewClientWrapperWithStatusUpdateCounter(memberClient4) memberClient4Wrapper = wrappedMemberClient4.(*clientWrapperWithStatusUpdateCounter) workApplier4 = NewReconciler( + "work-applier-wrapped-client", wrappedHubClient, memberReservedNSName4, memberDynamicClient4, wrappedMemberClient4, memberClient4.RESTMapper(), - hubMgr4.GetEventRecorderFor("work-applier"), + hubMgr4.GetEventRecorderFor("work-applier-wrapped-client"), maxConcurrentReconciles, parallelizer.NewParallelizer(workerCount), 30*time.Second, - true, - 60, - nil, // Use the default backoff rate limiter. + nil, // Use the default backoff rate limiter. + false, // Disable priority queueing. + nil, // Use the default priority linear equation coefficients. + nil, // Use the default priority linear equation coefficients. ) // Due to name conflicts, the third work applier must be set up manually. - err = ctrl.NewControllerManagedBy(hubMgr4).Named("work-applier-controller-skipping-status-update"). - WithOptions(ctrloption.Options{ - MaxConcurrentReconciles: workApplier4.concurrentReconciles, - }). - For(&fleetv1beta1.Work{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). - Complete(workApplier4) - Expect(err).NotTo(HaveOccurred()) + Expect(workApplier4.SetupWithManager(hubMgr4)).To(Succeed()) wg = sync.WaitGroup{} wg.Add(4) From 22b50347d91ccf8c241b55b082b9fee519330367 Mon Sep 17 00:00:00 2001 From: Arvind Thirumurugan Date: Wed, 14 Jan 2026 00:01:17 -0800 Subject: [PATCH 3/5] fix: error messages used for updateRun (#408) --- pkg/controllers/updaterun/controller.go | 11 +++--- pkg/controllers/updaterun/execution.go | 25 +++++++++--- pkg/controllers/updaterun/execution_test.go | 44 +++++++++++++++++++++ pkg/controllers/updaterun/initialization.go | 42 ++++++++++---------- 4 files changed, 91 insertions(+), 31 deletions(-) diff --git a/pkg/controllers/updaterun/controller.go b/pkg/controllers/updaterun/controller.go index b61bfee01..c65883a4c 100644 --- a/pkg/controllers/updaterun/controller.go +++ b/pkg/controllers/updaterun/controller.go @@ -51,9 +51,10 @@ import ( var ( // errStagedUpdatedAborted is the error when the updateRun is aborted. errStagedUpdatedAborted = fmt.Errorf("cannot continue the updateRun") - // errInitializedFailed is the error when the updateRun fails to initialize. - // It is a wrapped error of errStagedUpdatedAborted, because some initialization functions are reused in the validation step. - errInitializedFailed = fmt.Errorf("%w: failed to initialize the updateRun", errStagedUpdatedAborted) + // errValidationFailed is the error when the updateRun fails validation. + // It is a wrapped error of errStagedUpdatedAborted, because we perform validation during initialization + // and subsequent reconciliations where initialization is skipped. + errValidationFailed = fmt.Errorf("%w: failed to validate the updateRun", errStagedUpdatedAborted) ) // Reconciler reconciles an updateRun object. @@ -125,8 +126,8 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim var initErr error if toBeUpdatedBindings, toBeDeletedBindings, initErr = r.initialize(ctx, updateRun); initErr != nil { klog.ErrorS(initErr, "Failed to initialize the updateRun", "updateRun", runObjRef) - // errInitializedFailed cannot be retried. - if errors.Is(initErr, errInitializedFailed) { + // errStagedUpdatedAborted cannot be retried. + if errors.Is(initErr, errStagedUpdatedAborted) { return runtime.Result{}, r.recordInitializationFailed(ctx, updateRun, initErr.Error()) } return runtime.Result{}, initErr diff --git a/pkg/controllers/updaterun/execution.go b/pkg/controllers/updaterun/execution.go index 4f9f88a68..bdb37ac62 100644 --- a/pkg/controllers/updaterun/execution.go +++ b/pkg/controllers/updaterun/execution.go @@ -234,10 +234,10 @@ func (r *Reconciler) executeUpdatingStage( rolloutStarted := condition.IsConditionStatusTrue(meta.FindStatusCondition(binding.GetBindingStatus().Conditions, string(placementv1beta1.ResourceBindingRolloutStarted)), binding.GetGeneration()) bindingSpec := binding.GetBindingSpec() if !inSync || !rolloutStarted || bindingSpec.State != placementv1beta1.BindingStateBound { - // This issue mostly happens when there are concurrent updateRuns referencing the same clusterResourcePlacement but releasing different versions. + // This issue mostly happens when there are concurrent updateRuns referencing the same placement but releasing different versions. // After the 1st updateRun updates the binding, and before the controller re-checks the binding status, the 2nd updateRun updates the same binding, and thus the 1st updateRun is preempted and observes the binding not matching the desired state. preemptedErr := controller.NewUserError(fmt.Errorf("the binding of the updating cluster `%s` in the stage `%s` is not up-to-date with the desired status, "+ - "please check the status of binding `%s` and see if there is a concurrent updateRun referencing the same clusterResourcePlacement and updating the same cluster", + "please check the status of binding `%s` and see if there is a concurrent updateRun referencing the same placement and updating the same cluster", clusterStatus.ClusterName, updatingStageStatus.StageName, klog.KObj(binding))) klog.ErrorS(preemptedErr, "The binding has been changed during updating", "bindingSpecInSync", inSync, "bindingState", bindingSpec.State, @@ -553,7 +553,7 @@ func calculateMaxConcurrencyValue(status *placementv1beta1.UpdateRunStatus, stag // It marks the update run as stuck if any clusters are stuck, or as progressing if some clusters have finished updating. func aggregateUpdateRunStatus(updateRun placementv1beta1.UpdateRunObj, stageName string, stuckClusterNames []string) { if len(stuckClusterNames) > 0 { - markUpdateRunStuck(updateRun, stageName, strings.Join(stuckClusterNames, ", ")) + markUpdateRunStuck(updateRun, stageName, stuckClusterNames) } else if updateRun.GetUpdateRunSpec().State == placementv1beta1.StateRun { // If there is no stuck cluster but some progress has been made, mark the update run as progressing. markUpdateRunProgressing(updateRun) @@ -675,17 +675,32 @@ func markUpdateRunProgressingIfNotWaitingOrStuck(updateRun placementv1beta1.Upda } // markUpdateRunStuck marks the updateRun as stuck in memory. -func markUpdateRunStuck(updateRun placementv1beta1.UpdateRunObj, stageName, clusterNames string) { +func markUpdateRunStuck(updateRun placementv1beta1.UpdateRunObj, stageName string, stuckClusterNames []string) { updateRunStatus := updateRun.GetUpdateRunStatus() meta.SetStatusCondition(&updateRunStatus.Conditions, metav1.Condition{ Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), Status: metav1.ConditionFalse, ObservedGeneration: updateRun.GetGeneration(), Reason: condition.UpdateRunStuckReason, - Message: fmt.Sprintf("The updateRun is stuck waiting for cluster(s) %s in stage %s to finish updating, please check placement status for potential errors", clusterNames, stageName), + Message: fmt.Sprintf("The updateRun is stuck waiting for %s cluster(s) in stage %s to finish updating, please check placement status for potential errors", generateStuckClustersString(stuckClusterNames), stageName), }) } +func generateStuckClustersString(stuckClusterNames []string) string { + if len(stuckClusterNames) == 0 { + return "" + } + if len(stuckClusterNames) <= 3 { + return strings.Join(stuckClusterNames, ", ") + } + // Print first 3 if more than 3 stuck clusters. + clustersToShow := stuckClusterNames + if len(stuckClusterNames) > 3 { + clustersToShow = stuckClusterNames[:3] + } + return strings.Join(clustersToShow, ", ") + "..." +} + // markUpdateRunWaiting marks the updateRun as waiting in memory. func markUpdateRunWaiting(updateRun placementv1beta1.UpdateRunObj, message string) { updateRunStatus := updateRun.GetUpdateRunStatus() diff --git a/pkg/controllers/updaterun/execution_test.go b/pkg/controllers/updaterun/execution_test.go index 510720906..f4dd799aa 100644 --- a/pkg/controllers/updaterun/execution_test.go +++ b/pkg/controllers/updaterun/execution_test.go @@ -1205,3 +1205,47 @@ func TestCheckBeforeStageTasksStatus_NegativeCases(t *testing.T) { }) } } + +func TestGenerateStuckClustersString(t *testing.T) { + tests := []struct { + name string + stuckClusterNames []string + wantClusterString string + }{ + { + name: "empty cluster list", + stuckClusterNames: []string{}, + wantClusterString: "", + }, + { + name: "single cluster", + stuckClusterNames: []string{"cluster1"}, + wantClusterString: "cluster1", + }, + { + name: "two clusters", + stuckClusterNames: []string{"cluster1", "cluster2"}, + wantClusterString: "cluster1, cluster2", + }, + { + name: "three clusters", + stuckClusterNames: []string{"cluster1", "cluster2", "cluster3"}, + wantClusterString: "cluster1, cluster2, cluster3", + }, + { + name: "five clusters - should only show first three", + stuckClusterNames: []string{"cluster1", "cluster2", "cluster3", "cluster4", "cluster5"}, + wantClusterString: "cluster1, cluster2, cluster3...", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := generateStuckClustersString(tt.stuckClusterNames) + + if got != tt.wantClusterString { + t.Fatalf("generateStuckClustersString() = %v, want %v", got, tt.wantClusterString) + } + }) + } +} diff --git a/pkg/controllers/updaterun/initialization.go b/pkg/controllers/updaterun/initialization.go index d484aeccb..e45f419bf 100644 --- a/pkg/controllers/updaterun/initialization.go +++ b/pkg/controllers/updaterun/initialization.go @@ -91,7 +91,7 @@ func (r *Reconciler) validatePlacement(ctx context.Context, updateRun placementv if apierrors.IsNotFound(err) { placementNotFoundErr := controller.NewUserError(fmt.Errorf("parent placement not found")) klog.ErrorS(err, "Failed to get placement", "placement", placementKey, "updateRun", updateRunRef) - return types.NamespacedName{}, fmt.Errorf("%w: %s", errInitializedFailed, placementNotFoundErr.Error()) + return types.NamespacedName{}, fmt.Errorf("%w: %s", errValidationFailed, placementNotFoundErr.Error()) } klog.ErrorS(err, "Failed to get placement", "placement", placementKey, "updateRun", updateRunRef) return types.NamespacedName{}, controller.NewAPIServerError(true, err) @@ -106,7 +106,7 @@ func (r *Reconciler) validatePlacement(ctx context.Context, updateRun placementv if placementSpec.Strategy.Type != placementv1beta1.ExternalRolloutStrategyType { klog.V(2).InfoS("The placement does not have an external rollout strategy", "placement", placementKey, "updateRun", updateRunRef) wrongRolloutTypeErr := controller.NewUserError(errors.New("parent placement does not have an external rollout strategy, current strategy: " + string(placementSpec.Strategy.Type))) - return types.NamespacedName{}, fmt.Errorf("%w: %s", errInitializedFailed, wrongRolloutTypeErr.Error()) + return types.NamespacedName{}, fmt.Errorf("%w: %s", errValidationFailed, wrongRolloutTypeErr.Error()) } updateRunStatus := updateRun.GetUpdateRunStatus() @@ -138,13 +138,13 @@ func (r *Reconciler) determinePolicySnapshot( err := controller.NewUnexpectedBehaviorError(fmt.Errorf("more than one (%d in actual) latest policy snapshots associated with the placement: `%s`", len(policySnapshotObjs), placementKey)) klog.ErrorS(err, "Failed to find the latest policy snapshot", "placement", placementKey, "updateRun", updateRunRef) // no more retries for this error. - return nil, -1, fmt.Errorf("%w: %s", errInitializedFailed, err.Error()) + return nil, -1, fmt.Errorf("%w: %s", errValidationFailed, err.Error()) } // no latest policy snapshot found. err := fmt.Errorf("no latest policy snapshot associated with the placement: `%s`", placementKey) klog.ErrorS(err, "Failed to find the latest policy snapshot", "placement", placementKey, "updateRun", updateRunRef) // again, no more retries here. - return nil, -1, fmt.Errorf("%w: %s", errInitializedFailed, err.Error()) + return nil, -1, fmt.Errorf("%w: %s", errValidationFailed, err.Error()) } latestPolicySnapshot := policySnapshotObjs[0] @@ -154,7 +154,7 @@ func (r *Reconciler) determinePolicySnapshot( noIndexErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("policy snapshot `%s/%s` does not have a policy index label", latestPolicySnapshot.GetNamespace(), latestPolicySnapshot.GetName())) klog.ErrorS(noIndexErr, "Failed to get the policy index from the latestPolicySnapshot", "placement", placementKey, "policySnapshot", policySnapshotRef, "updateRun", updateRunRef) // no more retries here. - return nil, -1, fmt.Errorf("%w: %s", errInitializedFailed, noIndexErr.Error()) + return nil, -1, fmt.Errorf("%w: %s", errValidationFailed, noIndexErr.Error()) } updateRunStatus := updateRun.GetUpdateRunStatus() @@ -172,7 +172,7 @@ func (r *Reconciler) determinePolicySnapshot( annErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("%w: the policy snapshot `%s/%s` doesn't have valid cluster count annotation", err, latestPolicySnapshot.GetNamespace(), latestPolicySnapshot.GetName())) klog.ErrorS(annErr, "Failed to get the cluster count from the latestPolicySnapshot", "placement", placementKey, "policySnapshot", policySnapshotRef, "updateRun", updateRunRef) // no more retries here. - return nil, -1, fmt.Errorf("%w, %s", errInitializedFailed, annErr.Error()) + return nil, -1, fmt.Errorf("%w, %s", errValidationFailed, annErr.Error()) } clusterCount = count } else if policySnapshotSpec.Policy.PlacementType == placementv1beta1.PickFixedPlacementType { @@ -185,7 +185,7 @@ func (r *Reconciler) determinePolicySnapshot( if !condition.IsConditionStatusTrue(latestPolicySnapshot.GetCondition(string(placementv1beta1.PolicySnapshotScheduled)), latestPolicySnapshot.GetGeneration()) { scheduleErr := fmt.Errorf("policy snapshot `%s` not fully scheduled yet", latestPolicySnapshot.GetName()) klog.ErrorS(scheduleErr, "The policy snapshot is not scheduled successfully", "placement", placementKey, "policySnapshot", policySnapshotRef, "updateRun", updateRunRef) - return nil, -1, fmt.Errorf("%w: %s", errInitializedFailed, scheduleErr.Error()) + return nil, -1, fmt.Errorf("%w: %s", errValidationFailed, scheduleErr.Error()) } return latestPolicySnapshot, clusterCount, nil } @@ -241,7 +241,7 @@ func (r *Reconciler) collectScheduledClusters( nobindingErr := fmt.Errorf("no scheduled or to-be-deleted bindings found for the latest policy snapshot %s/%s", latestPolicySnapshot.GetNamespace(), latestPolicySnapshot.GetName()) klog.ErrorS(nobindingErr, "Failed to collect bindings", "placement", placementKey, "policySnapshot", policySnapshotRef, "updateRun", updateRunRef) // no more retries here. - return nil, nil, fmt.Errorf("%w: %s", errInitializedFailed, nobindingErr.Error()) + return nil, nil, fmt.Errorf("%w: %s", errValidationFailed, nobindingErr.Error()) } updateRunStatus := updateRun.GetUpdateRunStatus() @@ -253,7 +253,7 @@ func (r *Reconciler) collectScheduledClusters( countErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the number of selected bindings %d is not equal to the observed cluster count %d", len(selectedBindings), updateRunStatus.PolicyObservedClusterCount)) klog.ErrorS(countErr, "Failed to collect bindings", "placement", placementKey, "policySnapshot", policySnapshotRef, "updateRun", updateRunRef) // no more retries here. - return nil, nil, fmt.Errorf("%w: %s", errInitializedFailed, countErr.Error()) + return nil, nil, fmt.Errorf("%w: %s", errValidationFailed, countErr.Error()) } return selectedBindings, toBeDeletedBindings, nil } @@ -281,7 +281,7 @@ func (r *Reconciler) generateStagesByStrategy( if apierrors.IsNotFound(err) { // we won't continue or retry the initialization if the UpdateStrategy is not found. strategyNotFoundErr := controller.NewUserError(fmt.Errorf("referenced updateStrategy not found: `%s`", strategyKey)) - return fmt.Errorf("%w: %s", errInitializedFailed, strategyNotFoundErr.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, strategyNotFoundErr.Error()) } // other err can be retried. return controller.NewAPIServerError(true, err) @@ -348,13 +348,13 @@ func (r *Reconciler) computeRunStageStatus( klog.ErrorS(err, "Failed to validate the before stage tasks", "updateStrategy", strategyKey, "stageName", stage.Name, "updateRun", updateRunRef) // no more retries here. invalidBeforeStageErr := controller.NewUserError(fmt.Errorf("the before stage tasks are invalid, updateStrategy: `%s`, stage: %s, err: %s", strategyKey, stage.Name, err.Error())) - return fmt.Errorf("%w: %s", errInitializedFailed, invalidBeforeStageErr.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, invalidBeforeStageErr.Error()) } if err := validateAfterStageTask(stage.AfterStageTasks); err != nil { klog.ErrorS(err, "Failed to validate the after stage tasks", "updateStrategy", strategyKey, "stageName", stage.Name, "updateRun", updateRunRef) // no more retries here. invalidAfterStageErr := controller.NewUserError(fmt.Errorf("the after stage tasks are invalid, updateStrategy: `%s`, stage: %s, err: %s", strategyKey, stage.Name, err.Error())) - return fmt.Errorf("%w: %s", errInitializedFailed, invalidAfterStageErr.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, invalidAfterStageErr.Error()) } curStageUpdatingStatus := placementv1beta1.StageUpdatingStatus{StageName: stage.Name} @@ -364,7 +364,7 @@ func (r *Reconciler) computeRunStageStatus( klog.ErrorS(err, "Failed to convert label selector", "updateStrategy", strategyKey, "stageName", stage.Name, "labelSelector", stage.LabelSelector, "updateRun", updateRunRef) // no more retries here. invalidLabelErr := controller.NewUserError(fmt.Errorf("the stage label selector is invalid, updateStrategy: `%s`, stage: %s, err: %s", strategyKey, stage.Name, err.Error())) - return fmt.Errorf("%w: %s", errInitializedFailed, invalidLabelErr.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, invalidLabelErr.Error()) } // List all the clusters that match the label selector. var clusterList clusterv1beta1.MemberClusterList @@ -382,7 +382,7 @@ func (r *Reconciler) computeRunStageStatus( dupErr := controller.NewUserError(fmt.Errorf("cluster `%s` appears in more than one stages", cluster.Name)) klog.ErrorS(dupErr, "Failed to compute the stage", "updateStrategy", strategyKey, "stageName", stage.Name, "updateRun", updateRunRef) // no more retries here. - return fmt.Errorf("%w: %s", errInitializedFailed, dupErr.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, dupErr.Error()) } if stage.SortingLabelKey != nil { // interpret the label values as integers. @@ -390,7 +390,7 @@ func (r *Reconciler) computeRunStageStatus( keyErr := controller.NewUserError(fmt.Errorf("the sorting label `%s:%s` on cluster `%s` is not valid: %s", *stage.SortingLabelKey, cluster.Labels[*stage.SortingLabelKey], cluster.Name, err.Error())) klog.ErrorS(keyErr, "Failed to sort clusters in the stage", "updateStrategy", strategyKey, "stageName", stage.Name, "updateRun", updateRunRef) // no more retries here. - return fmt.Errorf("%w: %s", errInitializedFailed, keyErr.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, keyErr.Error()) } } curStageClusters = append(curStageClusters, cluster) @@ -457,7 +457,7 @@ func (r *Reconciler) computeRunStageStatus( sort.Strings(missingClusters) klog.ErrorS(missingErr, "Clusters are missing in any stage", "clusters", strings.Join(missingClusters, ", "), "updateStrategy", strategyKey, "updateRun", updateRunRef) // no more retries here, only show the first 10 missing clusters in the placement status. - return fmt.Errorf("%w: %s, total %d, showing up to 10: %s", errInitializedFailed, missingErr.Error(), len(missingClusters), strings.Join(missingClusters[:min(10, len(missingClusters))], ", ")) + return fmt.Errorf("%w: %s, total %d, showing up to 10: %s", errValidationFailed, missingErr.Error(), len(missingClusters), strings.Join(missingClusters[:min(10, len(missingClusters))], ", ")) } return nil } @@ -523,7 +523,7 @@ func (r *Reconciler) recordOverrideSnapshots(ctx context.Context, placementKey t err := controller.NewUnexpectedBehaviorError(fmt.Errorf("no master resourceSnapshot found for placement %s", placementKey)) klog.ErrorS(err, "Failed to find master resourceSnapshot", "updateRun", updateRunRef) // no more retries here. - return fmt.Errorf("%w: %s", errInitializedFailed, err.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, err.Error()) } klog.V(2).InfoS("Found master resourceSnapshot", "placement", placementKey, "masterResourceSnapshot", masterResourceSnapshot.GetName(), "updateRun", updateRunRef) @@ -539,7 +539,7 @@ func (r *Reconciler) recordOverrideSnapshots(ctx context.Context, placementKey t if err != nil { klog.ErrorS(err, "Failed to find all matching overrides for the stagedUpdateRun", "resourceSnapshot", resourceSnapshotRef, "updateRun", updateRunRef) // no more retries here. - return fmt.Errorf("%w: %s", errInitializedFailed, err.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, err.Error()) } // Pick the overrides associated with each target cluster. @@ -551,7 +551,7 @@ func (r *Reconciler) recordOverrideSnapshots(ctx context.Context, placementKey t if err != nil { klog.ErrorS(err, "Failed to pick the override snapshots for cluster", "cluster", clusterStatus.ClusterName, "resourceSnapshot", resourceSnapshotRef, "updateRun", updateRunRef) // no more retries here. - return fmt.Errorf("%w: %s", errInitializedFailed, err.Error()) + return fmt.Errorf("%w: %s", errValidationFailed, err.Error()) } } } @@ -571,7 +571,7 @@ func (r *Reconciler) getResourceSnapshotObjs(ctx context.Context, placementKey t err := controller.NewUserError(fmt.Errorf("invalid resource snapshot index `%s` provided, expected an integer >= 0", updateRunSpec.ResourceSnapshotIndex)) klog.ErrorS(err, "Failed to parse the resource snapshot index", "updateRun", updateRunRef) // no more retries here. - return nil, fmt.Errorf("%w: %s", errInitializedFailed, err.Error()) + return nil, fmt.Errorf("%w: %s", errValidationFailed, err.Error()) } resourceSnapshotList, err := controller.ListAllResourceSnapshotWithAnIndex(ctx, r.Client, updateRunSpec.ResourceSnapshotIndex, placementKey.Name, placementKey.Namespace) @@ -587,7 +587,7 @@ func (r *Reconciler) getResourceSnapshotObjs(ctx context.Context, placementKey t err := controller.NewUserError(fmt.Errorf("no resourceSnapshots with index `%d` found for placement `%s`", snapshotIndex, placementKey)) klog.ErrorS(err, "No specified resourceSnapshots found", "updateRun", updateRunRef) // no more retries here. - return resourceSnapshotObjs, fmt.Errorf("%w: %s", errInitializedFailed, err.Error()) + return resourceSnapshotObjs, fmt.Errorf("%w: %s", errValidationFailed, err.Error()) } return resourceSnapshotObjs, nil } From 9555d6657b79b90839e81a3f48087ce4417b7e57 Mon Sep 17 00:00:00 2001 From: Arvind Thirumurugan Date: Fri, 16 Jan 2026 11:06:11 -0800 Subject: [PATCH 4/5] fix: make public updaterun test util functions importable (#412) --- test/e2e/actuals_test.go | 191 ------------- test/e2e/cluster_staged_updaterun_test.go | 160 +++++++---- test/e2e/setup_test.go | 7 - test/e2e/staged_updaterun_test.go | 148 ++++++---- test/e2e/utils_test.go | 184 ------------ test/utils/updaterun/updaterun_status.go | 326 ++++++++++++++++++++++ 6 files changed, 526 insertions(+), 490 deletions(-) create mode 100644 test/utils/updaterun/updaterun_status.go diff --git a/test/e2e/actuals_test.go b/test/e2e/actuals_test.go index 15a92f606..8bbc0dcbe 100644 --- a/test/e2e/actuals_test.go +++ b/test/e2e/actuals_test.go @@ -2028,197 +2028,6 @@ func validateRPSnapshotRevisions(rpName, namespace string, wantPolicySnapshotRev return nil } -func updateRunClusterRolloutSucceedConditions(generation int64) []metav1.Condition { - return []metav1.Condition{ - { - Type: string(placementv1beta1.ClusterUpdatingConditionStarted), - Status: metav1.ConditionTrue, - Reason: condition.ClusterUpdatingStartedReason, - ObservedGeneration: generation, - }, - { - Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), - Status: metav1.ConditionTrue, - Reason: condition.ClusterUpdatingSucceededReason, - ObservedGeneration: generation, - }, - } -} - -func updateRunStageRolloutSucceedConditions(generation int64) []metav1.Condition { - return []metav1.Condition{ - { - Type: string(placementv1beta1.StageUpdatingConditionProgressing), - Status: metav1.ConditionFalse, - Reason: condition.StageUpdatingSucceededReason, - ObservedGeneration: generation, - }, - { - Type: string(placementv1beta1.StageUpdatingConditionSucceeded), - Status: metav1.ConditionTrue, - Reason: condition.StageUpdatingSucceededReason, - ObservedGeneration: generation, - }, - } -} - -func updateRunStageTaskSucceedConditions(generation int64, taskType placementv1beta1.StageTaskType) []metav1.Condition { - if taskType == placementv1beta1.StageTaskTypeApproval { - return []metav1.Condition{ - { - Type: string(placementv1beta1.StageTaskConditionApprovalRequestCreated), - Status: metav1.ConditionTrue, - Reason: condition.StageTaskApprovalRequestCreatedReason, - ObservedGeneration: generation, - }, - { - Type: string(placementv1beta1.StageTaskConditionApprovalRequestApproved), - Status: metav1.ConditionTrue, - Reason: condition.StageTaskApprovalRequestApprovedReason, - ObservedGeneration: generation, - }, - } - } - return []metav1.Condition{ - { - Type: string(placementv1beta1.StageTaskConditionWaitTimeElapsed), - Status: metav1.ConditionTrue, - Reason: condition.AfterStageTaskWaitTimeElapsedReason, - ObservedGeneration: generation, - }, - } -} - -func updateRunSucceedConditions(generation int64) []metav1.Condition { - return []metav1.Condition{ - { - Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), - Status: metav1.ConditionTrue, - Reason: condition.UpdateRunInitializeSucceededReason, - ObservedGeneration: generation, - }, - { - Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), - Status: metav1.ConditionFalse, - Reason: condition.UpdateRunSucceededReason, - ObservedGeneration: generation, - }, - { - Type: string(placementv1beta1.StagedUpdateRunConditionSucceeded), - Status: metav1.ConditionTrue, - Reason: condition.UpdateRunSucceededReason, - ObservedGeneration: generation, - }, - } -} - -func updateRunInitializedConditions(generation int64) []metav1.Condition { - return []metav1.Condition{ - { - Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), - Status: metav1.ConditionTrue, - Reason: condition.UpdateRunInitializeSucceededReason, - ObservedGeneration: generation, - }, - } -} - -func buildStageUpdatingStatusesForInitialized( - wantStrategySpec *placementv1beta1.UpdateStrategySpec, - wantSelectedClusters [][]string, - wantCROs map[string][]string, - wantROs map[string][]placementv1beta1.NamespacedName, - updateRun placementv1beta1.UpdateRunObj, -) []placementv1beta1.StageUpdatingStatus { - stagesStatus := make([]placementv1beta1.StageUpdatingStatus, len(wantStrategySpec.Stages)) - for i, stage := range wantStrategySpec.Stages { - stagesStatus[i].StageName = stage.Name - stagesStatus[i].Clusters = make([]placementv1beta1.ClusterUpdatingStatus, len(wantSelectedClusters[i])) - for j := range stagesStatus[i].Clusters { - stagesStatus[i].Clusters[j].ClusterName = wantSelectedClusters[i][j] - stagesStatus[i].Clusters[j].ClusterResourceOverrideSnapshots = wantCROs[wantSelectedClusters[i][j]] - stagesStatus[i].Clusters[j].ResourceOverrideSnapshots = wantROs[wantSelectedClusters[i][j]] - } - stagesStatus[i].BeforeStageTaskStatus = make([]placementv1beta1.StageTaskStatus, len(stage.BeforeStageTasks)) - for j, task := range stage.BeforeStageTasks { - stagesStatus[i].BeforeStageTaskStatus[j].Type = task.Type - if task.Type == placementv1beta1.StageTaskTypeApproval { - stagesStatus[i].BeforeStageTaskStatus[j].ApprovalRequestName = fmt.Sprintf(placementv1beta1.BeforeStageApprovalTaskNameFmt, updateRun.GetName(), stage.Name) - } - } - stagesStatus[i].AfterStageTaskStatus = make([]placementv1beta1.StageTaskStatus, len(stage.AfterStageTasks)) - for j, task := range stage.AfterStageTasks { - stagesStatus[i].AfterStageTaskStatus[j].Type = task.Type - if task.Type == placementv1beta1.StageTaskTypeApproval { - stagesStatus[i].AfterStageTaskStatus[j].ApprovalRequestName = fmt.Sprintf(placementv1beta1.AfterStageApprovalTaskNameFmt, updateRun.GetName(), stage.Name) - } - } - } - return stagesStatus -} - -func buildStageUpdatingStatuses( - wantStrategySpec *placementv1beta1.UpdateStrategySpec, - wantSelectedClusters [][]string, - wantCROs map[string][]string, - wantROs map[string][]placementv1beta1.NamespacedName, - updateRun placementv1beta1.UpdateRunObj, -) []placementv1beta1.StageUpdatingStatus { - stagesStatus := make([]placementv1beta1.StageUpdatingStatus, len(wantStrategySpec.Stages)) - for i, stage := range wantStrategySpec.Stages { - stagesStatus[i].StageName = stage.Name - stagesStatus[i].Clusters = make([]placementv1beta1.ClusterUpdatingStatus, len(wantSelectedClusters[i])) - for j := range stagesStatus[i].Clusters { - stagesStatus[i].Clusters[j].ClusterName = wantSelectedClusters[i][j] - stagesStatus[i].Clusters[j].ClusterResourceOverrideSnapshots = wantCROs[wantSelectedClusters[i][j]] - stagesStatus[i].Clusters[j].ResourceOverrideSnapshots = wantROs[wantSelectedClusters[i][j]] - stagesStatus[i].Clusters[j].Conditions = updateRunClusterRolloutSucceedConditions(updateRun.GetGeneration()) - } - stagesStatus[i].BeforeStageTaskStatus = make([]placementv1beta1.StageTaskStatus, len(stage.BeforeStageTasks)) - for j, task := range stage.BeforeStageTasks { - stagesStatus[i].BeforeStageTaskStatus[j].Type = task.Type - if task.Type == placementv1beta1.StageTaskTypeApproval { - stagesStatus[i].BeforeStageTaskStatus[j].ApprovalRequestName = fmt.Sprintf(placementv1beta1.BeforeStageApprovalTaskNameFmt, updateRun.GetName(), stage.Name) - } - stagesStatus[i].BeforeStageTaskStatus[j].Conditions = updateRunStageTaskSucceedConditions(updateRun.GetGeneration(), task.Type) - } - stagesStatus[i].AfterStageTaskStatus = make([]placementv1beta1.StageTaskStatus, len(stage.AfterStageTasks)) - for j, task := range stage.AfterStageTasks { - stagesStatus[i].AfterStageTaskStatus[j].Type = task.Type - if task.Type == placementv1beta1.StageTaskTypeApproval { - stagesStatus[i].AfterStageTaskStatus[j].ApprovalRequestName = fmt.Sprintf(placementv1beta1.AfterStageApprovalTaskNameFmt, updateRun.GetName(), stage.Name) - } - stagesStatus[i].AfterStageTaskStatus[j].Conditions = updateRunStageTaskSucceedConditions(updateRun.GetGeneration(), task.Type) - } - stagesStatus[i].Conditions = updateRunStageRolloutSucceedConditions(updateRun.GetGeneration()) - } - return stagesStatus -} - -func buildDeletionStageStatus( - wantUnscheduledClusters []string, - updateRun placementv1beta1.UpdateRunObj, -) *placementv1beta1.StageUpdatingStatus { - deleteStageStatus := buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) - deleteStageStatus.Conditions = updateRunStageRolloutSucceedConditions(updateRun.GetGeneration()) - return deleteStageStatus -} - -func buildDeletionStatusWithoutConditions( - wantUnscheduledClusters []string, - updateRun placementv1beta1.UpdateRunObj, -) *placementv1beta1.StageUpdatingStatus { - deleteStageStatus := &placementv1beta1.StageUpdatingStatus{ - StageName: "kubernetes-fleet.io/deleteStage", - } - deleteStageStatus.Clusters = make([]placementv1beta1.ClusterUpdatingStatus, len(wantUnscheduledClusters)) - for i := range deleteStageStatus.Clusters { - deleteStageStatus.Clusters[i].ClusterName = wantUnscheduledClusters[i] - deleteStageStatus.Clusters[i].Conditions = updateRunClusterRolloutSucceedConditions(updateRun.GetGeneration()) - } - return deleteStageStatus -} - func clusterStagedUpdateRunAndClusterApprovalRequestsRemovedActual(updateRunName string) func() error { return func() error { if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, &placementv1beta1.ClusterStagedUpdateRun{}); !errors.IsNotFound(err) { diff --git a/test/e2e/cluster_staged_updaterun_test.go b/test/e2e/cluster_staged_updaterun_test.go index 5a025c23a..25ac8e486 100644 --- a/test/e2e/cluster_staged_updaterun_test.go +++ b/test/e2e/cluster_staged_updaterun_test.go @@ -27,6 +27,7 @@ import ( apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -34,6 +35,7 @@ import ( placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" "github.com/kubefleet-dev/kubefleet/pkg/utils/condition" "github.com/kubefleet-dev/kubefleet/test/e2e/framework" + testutilsupdaterun "github.com/kubefleet-dev/kubefleet/test/utils/updaterun" ) const ( @@ -144,7 +146,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -152,9 +154,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to all the members after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -216,7 +218,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{resourceSnapshotIndex1st, resourceSnapshotIndex2nd, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -228,9 +230,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to all the members after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) By("Verify that new the configmap is updated on all member clusters") for idx := range allMemberClusters { @@ -324,7 +326,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -332,9 +334,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to all the members after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -395,7 +397,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{resourceSnapshotIndex1st, resourceSnapshotIndex2nd, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -407,9 +409,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) By("Verify that new the configmap is updated on all member clusters") for idx := range allMemberClusters { @@ -443,7 +445,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{resourceSnapshotIndex2nd, resourceSnapshotIndex1st, resourceSnapshotIndex2nd}, []bool{true, true, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollback resources to prod stage until approved", func() { @@ -455,9 +457,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollback resources to member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) for idx := range allMemberClusters { configMapActual := configMapPlacedOnClusterActual(allMemberClusters[idx], &oldConfigMap) @@ -549,7 +551,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames[:2], []string{"", resourceSnapshotIndex1st}, []bool{false, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -557,9 +559,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-1 after approval but not member-cluster-3 and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) @@ -605,7 +607,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to keep CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -613,9 +615,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex2nd, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex2nd, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -657,14 +659,14 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, false, []string{allMemberClusterNames[2]}, []string{resourceSnapshotIndex1st}, []bool{false}, nil, nil) Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should remove resources on member-cluster-1 and member-cluster-2 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) // need to go through two stages - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex3rd, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex3rd, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil, true) Eventually(csurSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[2]) checkIfRemovedWorkResourcesFromMemberClusters([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) @@ -752,7 +754,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames[2:], []string{""}, []bool{false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -760,9 +762,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[2]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) @@ -807,7 +809,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{false, true, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to keep CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to member-cluster-1 until approved", func() { @@ -815,9 +817,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-1 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -859,7 +861,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames[1:], []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{true, true}, nil, nil) Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not remove resources from member-cluster-1 until approved", func() { @@ -867,9 +869,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should remove resources on member-cluster-1 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) Eventually(csurSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[2]) checkIfRemovedWorkResourcesFromMemberClusters([]*framework.Cluster{allMemberClusters[0]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[1], allMemberClusters[2]}) @@ -1037,7 +1039,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, wantROs) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to member-cluster-1 and member-cluster-3 until approved", func() { @@ -1045,9 +1047,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, wantCROs, wantROs, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, wantCROs, wantROs, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1143,13 +1145,13 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should report diff for member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), applyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), applyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) }) @@ -1258,13 +1260,13 @@ var _ = Describe("test CRP rollout with staged update run", func() { Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update to the new configmap %s on cluster %s", newConfigMap.Name, allMemberClusterNames[1]) // Approval for AfterStageTasks of canary stage - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) // Approval for BeforeStageTasks of prod stage - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) // Verify complete rollout - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) // Verify new configmap is on all member clusters @@ -1353,7 +1355,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should complete the staged update run after approval", func() { - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1442,7 +1444,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { It("Should complete the cluster staged update run with all 3 clusters updated in parallel", func() { // With maxConcurrency=3, all 3 clusters should be updated in parallel. // Each round waits 15 seconds, so total time should be under 20s. - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunParallelEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1533,7 +1535,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { // Since maxConcurrency=70% each round we process 2 clusters in parallel, // so all 3 clusters should be updated in 2 rounds. // Each round waits 15 seconds, so total time should be under 40s. - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunParallelEventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1616,14 +1618,14 @@ var _ = Describe("test CRP rollout with staged update run", func() { checkIfRemovedWorkResourcesFromAllMemberClustersConsistently() By("Validating the csur status remains in Initialize state") - csurNotStartedActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, false) + csurNotStartedActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, false) Consistently(csurNotStartedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to Initialize updateRun %s", updateRunNames[0]) }) It("Should rollout resources to member-cluster-2 only after update run is in Run state", func() { // Update the update run state to Run By("Updating the update run state to Run") - UpdateClusterStagedUpdateRunState(ctx, hubClient, updateRunNames[0], placementv1beta1.StateRun) + updateClusterStagedUpdateRunState(updateRunNames[0], placementv1beta1.StateRun) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) @@ -1632,7 +1634,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout to all member clusters while waiting for beforeStageTask approval for prod stage", func() { @@ -1647,7 +1649,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { It("Should not rollout to all member clusters after stopping update run", func() { By("Updating update run state to Stop") - UpdateClusterStagedUpdateRunState(ctx, hubClient, updateRunNames[0], placementv1beta1.StateStop) + updateClusterStagedUpdateRunState(updateRunNames[0], placementv1beta1.StateStop) By("Validating not rolled out to member-cluster-1 and member-cluster-3 yet") checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) @@ -1659,7 +1661,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should not complete rollout to all member after beforeStageTask approval while in Stop state", func() { - ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) By("Validating not rolled out to member-cluster-1 and member-cluster-3 after beforeStageTask approval while update run is in Stop state") checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) @@ -1668,13 +1670,13 @@ var _ = Describe("test CRP rollout with staged update run", func() { It("Should complete rollout to all member clusters after resuming the update run to Run state", func() { // Update the update run state back to Run. By("Updating the update run state back to Run") - UpdateClusterStagedUpdateRunState(ctx, hubClient, updateRunNames[0], placementv1beta1.StateRun) + updateClusterStagedUpdateRunState(updateRunNames[0], placementv1beta1.StateRun) By("All member clusters should have work resources placed") checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1], allMemberClusters[2]}) By("Validating update run has succeeded after resuming") - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1755,7 +1757,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label createClusterStagedUpdateRunSucceed(updateRunNames[0], crpName, resourceSnapshotIndex1st, strategyName, placementv1beta1.StateRun) By("Validating staged update run has succeeded") - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) By("Validating CRP status as completed") @@ -1807,7 +1809,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label }) It("Should complete the second staged update run and complete the CRP", func() { - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1], allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1], allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames[1:], @@ -1855,7 +1857,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label }) It("Should complete the staged update run, complete CRP, and rollout resources to all member clusters", func() { - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames, @@ -1898,7 +1900,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label }) It("Should complete the staged update run, complete CRP, and rollout updated resources to all member clusters", func() { - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex2nd, true, allMemberClusterNames, @@ -1937,7 +1939,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label }) It("Should complete the staged update run, complete CRP, and re-place resources to all member clusters", func() { - csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := testutilsupdaterun.ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames, @@ -2075,6 +2077,50 @@ func createClusterStagedUpdateRunSucceedWithNoResourceSnapshotIndex(updateRunNam Expect(hubClient.Create(ctx, updateRun)).To(Succeed(), "Failed to create ClusterStagedUpdateRun %s", updateRunName) } +func updateClusterStagedUpdateRunState(updateRunName string, state placementv1beta1.State) { + Eventually(func() error { + updateRun := &placementv1beta1.ClusterStagedUpdateRun{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { + return fmt.Errorf("failed to get ClusterStagedUpdateRun %s", updateRunName) + } + + updateRun.Spec.State = state + if err := hubClient.Update(ctx, updateRun); err != nil { + return fmt.Errorf("failed to update ClusterStagedUpdateRun %s", updateRunName) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update ClusterStagedUpdateRun %s state to %s", updateRunName, state) +} + +func validateAndApproveClusterApprovalRequests(updateRunName, stageName, approvalRequestNameFmt, stageTaskType string) { + Eventually(func() error { + appReqList := &placementv1beta1.ClusterApprovalRequestList{} + if err := hubClient.List(ctx, appReqList, client.MatchingLabels{ + placementv1beta1.TargetUpdatingStageNameLabel: stageName, + placementv1beta1.TargetUpdateRunLabel: updateRunName, + placementv1beta1.TaskTypeLabel: stageTaskType, + }); err != nil { + return fmt.Errorf("failed to list approval requests: %w", err) + } + + if len(appReqList.Items) != 1 { + return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) + } + appReq := &appReqList.Items[0] + approvalRequestName := fmt.Sprintf(approvalRequestNameFmt, updateRunName, stageName) + if appReq.Name != approvalRequestName { + return fmt.Errorf("got approval request %s, want %s", appReq.Name, approvalRequestName) + } + meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ + Status: metav1.ConditionTrue, + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + ObservedGeneration: appReq.GetGeneration(), + Reason: "lgtm", + }) + return hubClient.Status().Update(ctx, appReq) + }, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get or approve approval request") +} + func updateConfigMapSucceed(newConfigMap *corev1.ConfigMap) { cm := &corev1.ConfigMap{} key := client.ObjectKey{Namespace: newConfigMap.Namespace, Name: newConfigMap.Name} diff --git a/test/e2e/setup_test.go b/test/e2e/setup_test.go index 051c56758..e3c5af1c5 100644 --- a/test/e2e/setup_test.go +++ b/test/e2e/setup_test.go @@ -270,13 +270,6 @@ var ( ignoreClusterNameField, cmpopts.EquateEmpty(), } - - updateRunStatusCmpOption = cmp.Options{ - cmpopts.SortSlices(lessFuncCondition), - utils.IgnoreConditionLTTAndMessageFields, - cmpopts.IgnoreFields(placementv1beta1.StageUpdatingStatus{}, "StartTime", "EndTime"), - cmpopts.EquateEmpty(), - } ) // TestMain sets up the E2E test environment. diff --git a/test/e2e/staged_updaterun_test.go b/test/e2e/staged_updaterun_test.go index edc9707db..b6f80b928 100644 --- a/test/e2e/staged_updaterun_test.go +++ b/test/e2e/staged_updaterun_test.go @@ -24,6 +24,7 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" @@ -33,6 +34,7 @@ import ( placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" "github.com/kubefleet-dev/kubefleet/pkg/utils/condition" "github.com/kubefleet-dev/kubefleet/test/e2e/framework" + testutilsupdaterun "github.com/kubefleet-dev/kubefleet/test/utils/updaterun" ) // Note that this container will run in parallel with other containers. @@ -134,7 +136,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -142,9 +144,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to all the members after approval and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -205,7 +207,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem []string{resourceSnapshotIndex1st, resourceSnapshotIndex2nd, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { By("Verify that the configmap is not updated on member-cluster-1 and member-cluster-3") @@ -216,9 +218,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to all the members after approval and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[1]) By("Verify that new the configmap is updated on all member clusters") for idx := range allMemberClusters { @@ -310,7 +312,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -318,9 +320,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to all the members after approval and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -381,7 +383,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem []string{resourceSnapshotIndex1st, resourceSnapshotIndex2nd, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -393,9 +395,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-1 and member-cluster-3 after approval and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[1]) By("Verify that new the configmap is updated on all member clusters") for idx := range allMemberClusters { @@ -429,7 +431,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem []string{resourceSnapshotIndex2nd, resourceSnapshotIndex1st, resourceSnapshotIndex2nd}, []bool{true, true, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollback resources to prod stage until approved", func() { @@ -441,9 +443,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollback resources to member-cluster-1 and member-cluster-3 after approval and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) for idx := range allMemberClusters { configMapActual := configMapPlacedOnClusterActual(allMemberClusters[idx], &oldConfigMap) @@ -533,7 +535,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames[:2], []string{"", resourceSnapshotIndex1st}, []bool{false, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -541,9 +543,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-1 after approval but not member-cluster-3 and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) @@ -589,7 +591,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to keep RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -597,9 +599,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-3 after approval and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex2nd, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex2nd, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[1]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -641,14 +643,14 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(appConfigMapIdentifiers(), resourceSnapshotIndex1st, false, []string{allMemberClusterNames[2]}, []string{resourceSnapshotIndex1st}, []bool{false}, nil, nil) Consistently(rpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should remove resources on member-cluster-1 and member-cluster-2 after approval and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) // need to go through two stages - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex3rd, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex3rd, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil, true) Eventually(surSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[2]) checkIfRemovedConfigMapFromMemberClusters([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) @@ -734,7 +736,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames[2:], []string{""}, []bool{false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -742,9 +744,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[2]}) checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) @@ -789,7 +791,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{false, true, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to keep RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to member-cluster-1 until approved", func() { @@ -797,9 +799,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-1 after approval and complete the staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[1]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -841,7 +843,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(appConfigMapIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames[1:], []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{true, true}, nil, nil) Consistently(rpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not remove resources from member-cluster-1 until approved", func() { @@ -849,9 +851,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should remove resources on member-cluster-1 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) Eventually(surSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[2]) checkIfRemovedConfigMapFromMemberClusters([]*framework.Cluster{allMemberClusters[0]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[1], allMemberClusters[2]}) @@ -992,7 +994,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, wantROs) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to member-cluster-1 and member-cluster-3 until approved", func() { @@ -1000,9 +1002,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, wantROs, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, wantROs, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1092,13 +1094,13 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should report diff for member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), applyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), applyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) }) @@ -1205,13 +1207,13 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update to the new configmap %s on cluster %s", newConfigMap.Name, allMemberClusterNames[1]) // Approval for AfterStageTask of canary stage - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) // Approval for BeforeStageTask of prod stage - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) // Verify complete rollout. - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) // Verify new configmap is on all member clusters. @@ -1304,7 +1306,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem It("Should complete the staged update run with all 3 clusters updated in parallel", func() { // With maxConcurrency=3, all 3 clusters should be updated in parallel. // Each round waits 15 seconds, so total time should be under 20s. - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunParallelEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1394,7 +1396,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem // Since maxConcurrency=70% each round we process 2 clusters in parallel, // so all 3 clusters should be updated in 2 rounds. // Each round waits 15 seconds, so total time should be under 40s. - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunParallelEventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1475,14 +1477,14 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem checkIfRemovedConfigMapFromAllMemberClustersConsistently() By("Validating the sur status remains in Initialize state") - surNotStartedActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, false) + surNotStartedActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, false) Consistently(surNotStartedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to Initialize updateRun %s/%s ", testNamespace, updateRunNames[0]) }) It("Should rollout resources to member-cluster-2 only after update run is in Run state", func() { // Update the update run state to Run. By("Updating the update run state to Run") - UpdateStagedUpdateRunState(ctx, hubClient, updateRunNames[0], testNamespace, placementv1beta1.StateRun) + updateStagedUpdateRunState(updateRunNames[0], testNamespace, placementv1beta1.StateRun) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) @@ -1491,7 +1493,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout to all member clusters while waiting for beforeStageTask approval for prod stage", func() { @@ -1507,7 +1509,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem It("Should not rollout to all member clusters after stopping update run", func() { // Update the update run state to Stop. By("Updating the update run state to Stop") - UpdateStagedUpdateRunState(ctx, hubClient, updateRunNames[0], testNamespace, placementv1beta1.StateStop) + updateStagedUpdateRunState(updateRunNames[0], testNamespace, placementv1beta1.StateStop) By("Validating not rolled out to member-cluster-1 and member-cluster-3 yet") checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) @@ -1519,7 +1521,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should not complete rollout to all member after beforeStageTask approval while in Stop state", func() { - ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) By("Validating not rolled out to member-cluster-1 and member-cluster-3 after beforeStageTask approval while update run is in Stop state") checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) @@ -1528,13 +1530,13 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem It("Should complete rollout to all member clusters after resuming the update run to Run state", func() { // Update the update run state back to Run. By("Updating the update run state back to Run") - UpdateStagedUpdateRunState(ctx, hubClient, updateRunNames[0], testNamespace, placementv1beta1.StateRun) + updateStagedUpdateRunState(updateRunNames[0], testNamespace, placementv1beta1.StateRun) By("All member clusters should have work resources placed") checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1], allMemberClusters[2]}) By("Validating update run has succeeded after resuming") - surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := testutilsupdaterun.StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1670,3 +1672,47 @@ func createStagedUpdateRunSucceedWithNoResourceSnapshotIndex(updateRunName, name } Expect(hubClient.Create(ctx, updateRun)).To(Succeed(), "Failed to create StagedUpdateRun %s", updateRunName) } + +func updateStagedUpdateRunState(updateRunName, namespace string, state placementv1beta1.State) { + Eventually(func() error { + updateRun := &placementv1beta1.StagedUpdateRun{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName, Namespace: namespace}, updateRun); err != nil { + return fmt.Errorf("failed to get StagedUpdateRun %s", updateRunName) + } + + updateRun.Spec.State = state + if err := hubClient.Update(ctx, updateRun); err != nil { + return fmt.Errorf("failed to update StagedUpdateRun %s", updateRunName) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update StagedUpdateRun %s to state %s", updateRunName, state) +} + +func validateAndApproveNamespacedApprovalRequests(updateRunName, namespace, stageName, approvalRequestNameFmt, stageTaskType string) { + Eventually(func() error { + appReqList := &placementv1beta1.ApprovalRequestList{} + if err := hubClient.List(ctx, appReqList, client.InNamespace(namespace), client.MatchingLabels{ + placementv1beta1.TargetUpdatingStageNameLabel: stageName, + placementv1beta1.TargetUpdateRunLabel: updateRunName, + placementv1beta1.TaskTypeLabel: stageTaskType, + }); err != nil { + return fmt.Errorf("failed to list approval requests: %w", err) + } + + if len(appReqList.Items) != 1 { + return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) + } + appReq := &appReqList.Items[0] + approvalRequestName := fmt.Sprintf(approvalRequestNameFmt, updateRunName, stageName) + if appReq.Name != approvalRequestName { + return fmt.Errorf("got approval request %s, want %s", appReq.Name, approvalRequestName) + } + meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ + Status: metav1.ConditionTrue, + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + ObservedGeneration: appReq.GetGeneration(), + Reason: "lgtm", + }) + return hubClient.Status().Update(ctx, appReq) + }, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get or approve approval request") +} diff --git a/test/e2e/utils_test.go b/test/e2e/utils_test.go index 1d6ddbabc..b23f45644 100644 --- a/test/e2e/utils_test.go +++ b/test/e2e/utils_test.go @@ -17,7 +17,6 @@ limitations under the License. package e2e import ( - "context" "encoding/json" "errors" "fmt" @@ -1833,186 +1832,3 @@ func retrievePlacement(placementKey types.NamespacedName) (placementv1beta1.Plac } return placement, nil } - -// ClusterStagedUpdateRunStatusSucceededActual verifies the status of the ClusterStagedUpdateRun. -func ClusterStagedUpdateRunStatusSucceededActual( - ctx context.Context, - hubClient client.Client, - updateRunName string, - wantResourceIndex string, - wantPolicyIndex string, - wantClusterCount int, - wantApplyStrategy *placementv1beta1.ApplyStrategy, - wantStrategySpec *placementv1beta1.UpdateStrategySpec, - wantSelectedClusters [][]string, - wantUnscheduledClusters []string, - wantCROs map[string][]string, - wantROs map[string][]placementv1beta1.NamespacedName, - execute bool, -) func() error { - return func() error { - updateRun := &placementv1beta1.ClusterStagedUpdateRun{} - if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { - return err - } - - wantStatus := placementv1beta1.UpdateRunStatus{ - PolicySnapshotIndexUsed: wantPolicyIndex, - ResourceSnapshotIndexUsed: wantResourceIndex, - PolicyObservedClusterCount: wantClusterCount, - ApplyStrategy: wantApplyStrategy.DeepCopy(), - UpdateStrategySnapshot: wantStrategySpec, - } - - if execute { - wantStatus.StagesStatus = buildStageUpdatingStatuses(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) - wantStatus.DeletionStageStatus = buildDeletionStageStatus(wantUnscheduledClusters, updateRun) - wantStatus.Conditions = updateRunSucceedConditions(updateRun.Generation) - } else { - wantStatus.StagesStatus = buildStageUpdatingStatusesForInitialized(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) - wantStatus.DeletionStageStatus = buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) - wantStatus.Conditions = updateRunInitializedConditions(updateRun.Generation) - } - if diff := cmp.Diff(updateRun.Status, wantStatus, updateRunStatusCmpOption...); diff != "" { - return fmt.Errorf("UpdateRun status diff (-got, +want): %s", diff) - } - return nil - } -} - -// StagedUpdateRunStatusSucceededActual verifies the status of the StagedUpdateRun. -func StagedUpdateRunStatusSucceededActual( - ctx context.Context, - hubClient client.Client, - updateRunName, namespace string, - wantResourceIndex, wantPolicyIndex string, - wantClusterCount int, - wantApplyStrategy *placementv1beta1.ApplyStrategy, - wantStrategySpec *placementv1beta1.UpdateStrategySpec, - wantSelectedClusters [][]string, - wantUnscheduledClusters []string, - wantCROs map[string][]string, - wantROs map[string][]placementv1beta1.NamespacedName, - execute bool, -) func() error { - return func() error { - updateRun := &placementv1beta1.StagedUpdateRun{} - if err := hubClient.Get(ctx, client.ObjectKey{Name: updateRunName, Namespace: namespace}, updateRun); err != nil { - return err - } - - wantStatus := placementv1beta1.UpdateRunStatus{ - PolicySnapshotIndexUsed: wantPolicyIndex, - ResourceSnapshotIndexUsed: wantResourceIndex, - PolicyObservedClusterCount: wantClusterCount, - ApplyStrategy: wantApplyStrategy.DeepCopy(), - UpdateStrategySnapshot: wantStrategySpec, - } - - if execute { - wantStatus.StagesStatus = buildStageUpdatingStatuses(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) - wantStatus.DeletionStageStatus = buildDeletionStageStatus(wantUnscheduledClusters, updateRun) - wantStatus.Conditions = updateRunSucceedConditions(updateRun.Generation) - } else { - wantStatus.StagesStatus = buildStageUpdatingStatusesForInitialized(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) - wantStatus.DeletionStageStatus = buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) - wantStatus.Conditions = updateRunInitializedConditions(updateRun.Generation) - } - if diff := cmp.Diff(updateRun.Status, wantStatus, updateRunStatusCmpOption...); diff != "" { - return fmt.Errorf("UpdateRun status diff (-got, +want): %s", diff) - } - return nil - } -} - -// UpdateClusterStagedUpdateRunState updates the state of the ClusterStagedUpdateRun with the given name and state. -func UpdateClusterStagedUpdateRunState(ctx context.Context, hubClient client.Client, updateRunName string, state placementv1beta1.State) { - Eventually(func() error { - updateRun := &placementv1beta1.ClusterStagedUpdateRun{} - if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { - return fmt.Errorf("failed to get ClusterStagedUpdateRun %s", updateRunName) - } - - updateRun.Spec.State = state - if err := hubClient.Update(ctx, updateRun); err != nil { - return fmt.Errorf("failed to update ClusterStagedUpdateRun %s", updateRunName) - } - return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update ClusterStagedUpdateRun %s state to %s", updateRunName, state) -} - -// ValidateAndApproveClusterApprovalRequests validates and approves the cluster approval request for the given update run, stage based on stage task type. -func ValidateAndApproveClusterApprovalRequests(ctx context.Context, hubClient client.Client, updateRunName, stageName, approvalRequestNameFmt, stageTaskType string) { - Eventually(func() error { - appReqList := &placementv1beta1.ClusterApprovalRequestList{} - if err := hubClient.List(ctx, appReqList, client.MatchingLabels{ - placementv1beta1.TargetUpdatingStageNameLabel: stageName, - placementv1beta1.TargetUpdateRunLabel: updateRunName, - placementv1beta1.TaskTypeLabel: stageTaskType, - }); err != nil { - return fmt.Errorf("failed to list approval requests: %w", err) - } - - if len(appReqList.Items) != 1 { - return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) - } - appReq := &appReqList.Items[0] - approvalRequestName := fmt.Sprintf(approvalRequestNameFmt, updateRunName, stageName) - if appReq.Name != approvalRequestName { - return fmt.Errorf("got approval request %s, want %s", appReq.Name, approvalRequestName) - } - meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ - Status: metav1.ConditionTrue, - Type: string(placementv1beta1.ApprovalRequestConditionApproved), - ObservedGeneration: appReq.GetGeneration(), - Reason: "lgtm", - }) - return hubClient.Status().Update(ctx, appReq) - }, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get or approve approval request") -} - -// UpdateStagedUpdateRunState updates the state of the StagedUpdateRun with the given name, namespace and state. -func UpdateStagedUpdateRunState(ctx context.Context, hubClient client.Client, updateRunName, namespace string, state placementv1beta1.State) { - Eventually(func() error { - updateRun := &placementv1beta1.StagedUpdateRun{} - if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName, Namespace: namespace}, updateRun); err != nil { - return fmt.Errorf("failed to get StagedUpdateRun %s", updateRunName) - } - - updateRun.Spec.State = state - if err := hubClient.Update(ctx, updateRun); err != nil { - return fmt.Errorf("failed to update StagedUpdateRun %s", updateRunName) - } - return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update StagedUpdateRun %s to state %s", updateRunName, state) -} - -// ValidateAndApproveNamespacedApprovalRequests validates and approves the approval request for the given update run, stage based on stage task type. -func ValidateAndApproveNamespacedApprovalRequests(ctx context.Context, hubClient client.Client, updateRunName, namespace, stageName, approvalRequestNameFmt, stageTaskType string) { - Eventually(func() error { - appReqList := &placementv1beta1.ApprovalRequestList{} - if err := hubClient.List(ctx, appReqList, client.InNamespace(namespace), client.MatchingLabels{ - placementv1beta1.TargetUpdatingStageNameLabel: stageName, - placementv1beta1.TargetUpdateRunLabel: updateRunName, - placementv1beta1.TaskTypeLabel: stageTaskType, - }); err != nil { - return fmt.Errorf("failed to list approval requests: %w", err) - } - - if len(appReqList.Items) != 1 { - return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) - } - appReq := &appReqList.Items[0] - approvalRequestName := fmt.Sprintf(approvalRequestNameFmt, updateRunName, stageName) - if appReq.Name != approvalRequestName { - return fmt.Errorf("got approval request %s, want %s", appReq.Name, approvalRequestName) - } - meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ - Status: metav1.ConditionTrue, - Type: string(placementv1beta1.ApprovalRequestConditionApproved), - ObservedGeneration: appReq.GetGeneration(), - Reason: "lgtm", - }) - return hubClient.Status().Update(ctx, appReq) - }, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get or approve approval request") -} diff --git a/test/utils/updaterun/updaterun_status.go b/test/utils/updaterun/updaterun_status.go new file mode 100644 index 000000000..e3f33119e --- /dev/null +++ b/test/utils/updaterun/updaterun_status.go @@ -0,0 +1,326 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package updaterun + +import ( + "context" + "fmt" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" + "github.com/kubefleet-dev/kubefleet/pkg/utils" + "github.com/kubefleet-dev/kubefleet/pkg/utils/condition" +) + +var ( + lessFuncCondition = func(a, b metav1.Condition) bool { + return a.Type < b.Type + } + updateRunStatusCmpOption = cmp.Options{ + cmpopts.SortSlices(lessFuncCondition), + utils.IgnoreConditionLTTAndMessageFields, + cmpopts.IgnoreFields(placementv1beta1.StageUpdatingStatus{}, "StartTime", "EndTime"), + cmpopts.EquateEmpty(), + } +) + +// ClusterStagedUpdateRunStatusSucceededActual verifies the status of the ClusterStagedUpdateRun. +func ClusterStagedUpdateRunStatusSucceededActual( + ctx context.Context, + hubClient client.Client, + updateRunName string, + wantResourceIndex string, + wantPolicyIndex string, + wantClusterCount int, + wantApplyStrategy *placementv1beta1.ApplyStrategy, + wantStrategySpec *placementv1beta1.UpdateStrategySpec, + wantSelectedClusters [][]string, + wantUnscheduledClusters []string, + wantCROs map[string][]string, + wantROs map[string][]placementv1beta1.NamespacedName, + execute bool, +) func() error { + return func() error { + updateRun := &placementv1beta1.ClusterStagedUpdateRun{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { + return err + } + + wantStatus := placementv1beta1.UpdateRunStatus{ + PolicySnapshotIndexUsed: wantPolicyIndex, + ResourceSnapshotIndexUsed: wantResourceIndex, + PolicyObservedClusterCount: wantClusterCount, + ApplyStrategy: wantApplyStrategy.DeepCopy(), + UpdateStrategySnapshot: wantStrategySpec, + } + + if execute { + wantStatus.StagesStatus = buildStageUpdatingStatuses(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) + wantStatus.DeletionStageStatus = buildDeletionStageStatus(wantUnscheduledClusters, updateRun) + wantStatus.Conditions = updateRunSucceedConditions(updateRun.Generation) + } else { + wantStatus.StagesStatus = buildStageUpdatingStatusesForInitialized(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) + wantStatus.DeletionStageStatus = buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) + wantStatus.Conditions = updateRunInitializedConditions(updateRun.Generation) + } + if diff := cmp.Diff(updateRun.Status, wantStatus, updateRunStatusCmpOption...); diff != "" { + return fmt.Errorf("UpdateRun status diff (-got, +want): %s", diff) + } + return nil + } +} + +// StagedUpdateRunStatusSucceededActual verifies the status of the StagedUpdateRun. +func StagedUpdateRunStatusSucceededActual( + ctx context.Context, + hubClient client.Client, + updateRunName, namespace string, + wantResourceIndex, wantPolicyIndex string, + wantClusterCount int, + wantApplyStrategy *placementv1beta1.ApplyStrategy, + wantStrategySpec *placementv1beta1.UpdateStrategySpec, + wantSelectedClusters [][]string, + wantUnscheduledClusters []string, + wantCROs map[string][]string, + wantROs map[string][]placementv1beta1.NamespacedName, + execute bool, +) func() error { + return func() error { + updateRun := &placementv1beta1.StagedUpdateRun{} + if err := hubClient.Get(ctx, client.ObjectKey{Name: updateRunName, Namespace: namespace}, updateRun); err != nil { + return err + } + + wantStatus := placementv1beta1.UpdateRunStatus{ + PolicySnapshotIndexUsed: wantPolicyIndex, + ResourceSnapshotIndexUsed: wantResourceIndex, + PolicyObservedClusterCount: wantClusterCount, + ApplyStrategy: wantApplyStrategy.DeepCopy(), + UpdateStrategySnapshot: wantStrategySpec, + } + + if execute { + wantStatus.StagesStatus = buildStageUpdatingStatuses(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) + wantStatus.DeletionStageStatus = buildDeletionStageStatus(wantUnscheduledClusters, updateRun) + wantStatus.Conditions = updateRunSucceedConditions(updateRun.Generation) + } else { + wantStatus.StagesStatus = buildStageUpdatingStatusesForInitialized(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) + wantStatus.DeletionStageStatus = buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) + wantStatus.Conditions = updateRunInitializedConditions(updateRun.Generation) + } + if diff := cmp.Diff(updateRun.Status, wantStatus, updateRunStatusCmpOption...); diff != "" { + return fmt.Errorf("UpdateRun status diff (-got, +want): %s", diff) + } + return nil + } +} + +func buildStageUpdatingStatusesForInitialized( + wantStrategySpec *placementv1beta1.UpdateStrategySpec, + wantSelectedClusters [][]string, + wantCROs map[string][]string, + wantROs map[string][]placementv1beta1.NamespacedName, + updateRun placementv1beta1.UpdateRunObj, +) []placementv1beta1.StageUpdatingStatus { + stagesStatus := make([]placementv1beta1.StageUpdatingStatus, len(wantStrategySpec.Stages)) + for i, stage := range wantStrategySpec.Stages { + stagesStatus[i].StageName = stage.Name + stagesStatus[i].Clusters = make([]placementv1beta1.ClusterUpdatingStatus, len(wantSelectedClusters[i])) + for j := range stagesStatus[i].Clusters { + stagesStatus[i].Clusters[j].ClusterName = wantSelectedClusters[i][j] + stagesStatus[i].Clusters[j].ClusterResourceOverrideSnapshots = wantCROs[wantSelectedClusters[i][j]] + stagesStatus[i].Clusters[j].ResourceOverrideSnapshots = wantROs[wantSelectedClusters[i][j]] + } + stagesStatus[i].BeforeStageTaskStatus = make([]placementv1beta1.StageTaskStatus, len(stage.BeforeStageTasks)) + for j, task := range stage.BeforeStageTasks { + stagesStatus[i].BeforeStageTaskStatus[j].Type = task.Type + if task.Type == placementv1beta1.StageTaskTypeApproval { + stagesStatus[i].BeforeStageTaskStatus[j].ApprovalRequestName = fmt.Sprintf(placementv1beta1.BeforeStageApprovalTaskNameFmt, updateRun.GetName(), stage.Name) + } + } + stagesStatus[i].AfterStageTaskStatus = make([]placementv1beta1.StageTaskStatus, len(stage.AfterStageTasks)) + for j, task := range stage.AfterStageTasks { + stagesStatus[i].AfterStageTaskStatus[j].Type = task.Type + if task.Type == placementv1beta1.StageTaskTypeApproval { + stagesStatus[i].AfterStageTaskStatus[j].ApprovalRequestName = fmt.Sprintf(placementv1beta1.AfterStageApprovalTaskNameFmt, updateRun.GetName(), stage.Name) + } + } + } + return stagesStatus +} + +func buildStageUpdatingStatuses( + wantStrategySpec *placementv1beta1.UpdateStrategySpec, + wantSelectedClusters [][]string, + wantCROs map[string][]string, + wantROs map[string][]placementv1beta1.NamespacedName, + updateRun placementv1beta1.UpdateRunObj, +) []placementv1beta1.StageUpdatingStatus { + stagesStatus := make([]placementv1beta1.StageUpdatingStatus, len(wantStrategySpec.Stages)) + for i, stage := range wantStrategySpec.Stages { + stagesStatus[i].StageName = stage.Name + stagesStatus[i].Clusters = make([]placementv1beta1.ClusterUpdatingStatus, len(wantSelectedClusters[i])) + for j := range stagesStatus[i].Clusters { + stagesStatus[i].Clusters[j].ClusterName = wantSelectedClusters[i][j] + stagesStatus[i].Clusters[j].ClusterResourceOverrideSnapshots = wantCROs[wantSelectedClusters[i][j]] + stagesStatus[i].Clusters[j].ResourceOverrideSnapshots = wantROs[wantSelectedClusters[i][j]] + stagesStatus[i].Clusters[j].Conditions = updateRunClusterRolloutSucceedConditions(updateRun.GetGeneration()) + } + stagesStatus[i].BeforeStageTaskStatus = make([]placementv1beta1.StageTaskStatus, len(stage.BeforeStageTasks)) + for j, task := range stage.BeforeStageTasks { + stagesStatus[i].BeforeStageTaskStatus[j].Type = task.Type + if task.Type == placementv1beta1.StageTaskTypeApproval { + stagesStatus[i].BeforeStageTaskStatus[j].ApprovalRequestName = fmt.Sprintf(placementv1beta1.BeforeStageApprovalTaskNameFmt, updateRun.GetName(), stage.Name) + } + stagesStatus[i].BeforeStageTaskStatus[j].Conditions = updateRunStageTaskSucceedConditions(updateRun.GetGeneration(), task.Type) + } + stagesStatus[i].AfterStageTaskStatus = make([]placementv1beta1.StageTaskStatus, len(stage.AfterStageTasks)) + for j, task := range stage.AfterStageTasks { + stagesStatus[i].AfterStageTaskStatus[j].Type = task.Type + if task.Type == placementv1beta1.StageTaskTypeApproval { + stagesStatus[i].AfterStageTaskStatus[j].ApprovalRequestName = fmt.Sprintf(placementv1beta1.AfterStageApprovalTaskNameFmt, updateRun.GetName(), stage.Name) + } + stagesStatus[i].AfterStageTaskStatus[j].Conditions = updateRunStageTaskSucceedConditions(updateRun.GetGeneration(), task.Type) + } + stagesStatus[i].Conditions = updateRunStageRolloutSucceedConditions(updateRun.GetGeneration()) + } + return stagesStatus +} + +func buildDeletionStageStatus( + wantUnscheduledClusters []string, + updateRun placementv1beta1.UpdateRunObj, +) *placementv1beta1.StageUpdatingStatus { + deleteStageStatus := buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) + deleteStageStatus.Conditions = updateRunStageRolloutSucceedConditions(updateRun.GetGeneration()) + return deleteStageStatus +} + +func buildDeletionStatusWithoutConditions( + wantUnscheduledClusters []string, + updateRun placementv1beta1.UpdateRunObj, +) *placementv1beta1.StageUpdatingStatus { + deleteStageStatus := &placementv1beta1.StageUpdatingStatus{ + StageName: "kubernetes-fleet.io/deleteStage", + } + deleteStageStatus.Clusters = make([]placementv1beta1.ClusterUpdatingStatus, len(wantUnscheduledClusters)) + for i := range deleteStageStatus.Clusters { + deleteStageStatus.Clusters[i].ClusterName = wantUnscheduledClusters[i] + deleteStageStatus.Clusters[i].Conditions = updateRunClusterRolloutSucceedConditions(updateRun.GetGeneration()) + } + return deleteStageStatus +} + +func updateRunClusterRolloutSucceedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + Reason: condition.ClusterUpdatingStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + Reason: condition.ClusterUpdatingSucceededReason, + ObservedGeneration: generation, + }, + } +} + +func updateRunStageRolloutSucceedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionFalse, + Reason: condition.StageUpdatingSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.StageUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + Reason: condition.StageUpdatingSucceededReason, + ObservedGeneration: generation, + }, + } +} + +func updateRunStageTaskSucceedConditions(generation int64, taskType placementv1beta1.StageTaskType) []metav1.Condition { + if taskType == placementv1beta1.StageTaskTypeApproval { + return []metav1.Condition{ + { + Type: string(placementv1beta1.StageTaskConditionApprovalRequestCreated), + Status: metav1.ConditionTrue, + Reason: condition.StageTaskApprovalRequestCreatedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.StageTaskConditionApprovalRequestApproved), + Status: metav1.ConditionTrue, + Reason: condition.StageTaskApprovalRequestApprovedReason, + ObservedGeneration: generation, + }, + } + } + return []metav1.Condition{ + { + Type: string(placementv1beta1.StageTaskConditionWaitTimeElapsed), + Status: metav1.ConditionTrue, + Reason: condition.AfterStageTaskWaitTimeElapsedReason, + ObservedGeneration: generation, + }, + } +} + +func updateRunSucceedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), + Status: metav1.ConditionTrue, + Reason: condition.UpdateRunInitializeSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionFalse, + Reason: condition.UpdateRunSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.StagedUpdateRunConditionSucceeded), + Status: metav1.ConditionTrue, + Reason: condition.UpdateRunSucceededReason, + ObservedGeneration: generation, + }, + } +} + +func updateRunInitializedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), + Status: metav1.ConditionTrue, + Reason: condition.UpdateRunInitializeSucceededReason, + ObservedGeneration: generation, + }, + } +} From 2a197fd52c3380a39efed2d92215bbd2b3058177 Mon Sep 17 00:00:00 2001 From: Britania Rodriguez Reyes <145056127+britaniar@users.noreply.github.com> Date: Tue, 20 Jan 2026 09:45:50 -0800 Subject: [PATCH 5/5] fix: add state and remove generation labels from update run metrics (#389) --- pkg/controllers/updaterun/controller.go | 9 +- .../updaterun/controller_integration_test.go | 44 +++++---- .../updaterun/execution_integration_test.go | 97 +++++++++---------- .../initialization_integration_test.go | 49 +++++----- .../updaterun/stop_integration_test.go | 50 ++++------ .../updaterun/validation_integration_test.go | 12 +-- pkg/metrics/hub/metrics.go | 2 +- 7 files changed, 123 insertions(+), 140 deletions(-) diff --git a/pkg/controllers/updaterun/controller.go b/pkg/controllers/updaterun/controller.go index c65883a4c..befdc3831 100644 --- a/pkg/controllers/updaterun/controller.go +++ b/pkg/controllers/updaterun/controller.go @@ -21,7 +21,6 @@ import ( "context" "errors" "fmt" - "strconv" "time" "github.com/prometheus/client_golang/prometheus" @@ -489,26 +488,26 @@ func handleApprovalRequestDelete(obj client.Object, q workqueue.TypedRateLimitin // emitUpdateRunStatusMetric emits the update run status metric based on status conditions in the updateRun. func emitUpdateRunStatusMetric(updateRun placementv1beta1.UpdateRunObj) { generation := updateRun.GetGeneration() - genStr := strconv.FormatInt(generation, 10) + state := updateRun.GetUpdateRunSpec().State updateRunStatus := updateRun.GetUpdateRunStatus() succeedCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionSucceeded)) if succeedCond != nil && succeedCond.ObservedGeneration == generation { - hubmetrics.FleetUpdateRunStatusLastTimestampSeconds.WithLabelValues(updateRun.GetNamespace(), updateRun.GetName(), genStr, + hubmetrics.FleetUpdateRunStatusLastTimestampSeconds.WithLabelValues(updateRun.GetNamespace(), updateRun.GetName(), string(state), string(placementv1beta1.StagedUpdateRunConditionSucceeded), string(succeedCond.Status), succeedCond.Reason).SetToCurrentTime() return } progressingCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionProgressing)) if progressingCond != nil && progressingCond.ObservedGeneration == generation { - hubmetrics.FleetUpdateRunStatusLastTimestampSeconds.WithLabelValues(updateRun.GetNamespace(), updateRun.GetName(), genStr, + hubmetrics.FleetUpdateRunStatusLastTimestampSeconds.WithLabelValues(updateRun.GetNamespace(), updateRun.GetName(), string(state), string(placementv1beta1.StagedUpdateRunConditionProgressing), string(progressingCond.Status), progressingCond.Reason).SetToCurrentTime() return } initializedCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionInitialized)) if initializedCond != nil && initializedCond.ObservedGeneration == generation { - hubmetrics.FleetUpdateRunStatusLastTimestampSeconds.WithLabelValues(updateRun.GetNamespace(), updateRun.GetName(), genStr, + hubmetrics.FleetUpdateRunStatusLastTimestampSeconds.WithLabelValues(updateRun.GetNamespace(), updateRun.GetName(), string(state), string(placementv1beta1.StagedUpdateRunConditionInitialized), string(initializedCond.Status), initializedCond.Reason).SetToCurrentTime() return } diff --git a/pkg/controllers/updaterun/controller_integration_test.go b/pkg/controllers/updaterun/controller_integration_test.go index e20bd95a8..eb5db68a1 100644 --- a/pkg/controllers/updaterun/controller_integration_test.go +++ b/pkg/controllers/updaterun/controller_integration_test.go @@ -258,23 +258,27 @@ func validateUpdateRunMetricsEmitted(wantMetrics ...*prometheusclientmodel.Metri }, timeout, interval).Should(Succeed(), "failed to validate the update run status metrics") } +// generateMetricsLabels generates the labels for the update run status metrics. +// We pass the state explicitly instead of using updateRun.Spec.State because the metric +// should reflect the state at the time the condition occurred, which may be different from +// the current updateRun state if the updateRun has transitioned since then. func generateMetricsLabels( updateRun *placementv1beta1.ClusterStagedUpdateRun, - condition, status, reason string, + state, condition, status, reason string, ) []*prometheusclientmodel.LabelPair { return []*prometheusclientmodel.LabelPair{ {Name: ptr.To("namespace"), Value: &updateRun.Namespace}, {Name: ptr.To("name"), Value: &updateRun.Name}, - {Name: ptr.To("generation"), Value: ptr.To(strconv.FormatInt(updateRun.Generation, 10))}, + {Name: ptr.To("state"), Value: ptr.To(state)}, {Name: ptr.To("condition"), Value: ptr.To(condition)}, {Name: ptr.To("status"), Value: ptr.To(status)}, {Name: ptr.To("reason"), Value: ptr.To(reason)}, } } -func generateInitializationSucceededMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateInitializationSucceededMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionInitialized), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionInitialized), string(metav1.ConditionTrue), condition.UpdateRunInitializeSucceededReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), @@ -282,9 +286,9 @@ func generateInitializationSucceededMetric(updateRun *placementv1beta1.ClusterSt } } -func generateInitializationFailedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateInitializationFailedMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionInitialized), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionInitialized), string(metav1.ConditionFalse), condition.UpdateRunInitializeFailedReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), @@ -292,9 +296,9 @@ func generateInitializationFailedMetric(updateRun *placementv1beta1.ClusterStage } } -func generateProgressingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateProgressingMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionProgressing), string(metav1.ConditionTrue), condition.UpdateRunProgressingReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), @@ -302,9 +306,9 @@ func generateProgressingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRu } } -func generateWaitingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateWaitingMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionProgressing), string(metav1.ConditionFalse), condition.UpdateRunWaitingReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), @@ -312,9 +316,9 @@ func generateWaitingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) * } } -func generateStuckMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateStuckMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionProgressing), string(metav1.ConditionFalse), condition.UpdateRunStuckReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), @@ -322,9 +326,9 @@ func generateStuckMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *pr } } -func generateFailedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateFailedMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionSucceeded), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionSucceeded), string(metav1.ConditionFalse), condition.UpdateRunFailedReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), @@ -332,9 +336,9 @@ func generateFailedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *p } } -func generateStoppingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateStoppingMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionProgressing), string(metav1.ConditionUnknown), condition.UpdateRunStoppingReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), @@ -342,9 +346,9 @@ func generateStoppingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) } } -func generateStoppedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateStoppedMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionProgressing), string(metav1.ConditionFalse), condition.UpdateRunStoppedReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), @@ -352,9 +356,9 @@ func generateStoppedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) * } } -func generateSucceededMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { +func generateSucceededMetric(state placementv1beta1.State, updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ - Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionSucceeded), + Label: generateMetricsLabels(updateRun, string(state), string(placementv1beta1.StagedUpdateRunConditionSucceeded), string(metav1.ConditionTrue), condition.UpdateRunSucceededReason), Gauge: &prometheusclientmodel.Gauge{ Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), diff --git a/pkg/controllers/updaterun/execution_integration_test.go b/pkg/controllers/updaterun/execution_integration_test.go index 2c034e240..219c22fdb 100644 --- a/pkg/controllers/updaterun/execution_integration_test.go +++ b/pkg/controllers/updaterun/execution_integration_test.go @@ -24,7 +24,6 @@ import ( "github.com/google/go-cmp/cmp" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - promclient "github.com/prometheus/client_model/go" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -185,7 +184,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateApprovalRequestCreated(wantApprovalRequest) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should not start rolling out 1st stage", func() { @@ -197,7 +196,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).Should(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should accept the approval request and start to rollout 1st stage", func() { @@ -238,7 +237,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 2nd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -256,7 +255,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 3rd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -274,7 +273,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 4th cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -292,7 +291,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 5th cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -314,7 +313,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should complete the 1st stage after wait time passed and approval request approved and move on to the 2nd stage", func() { @@ -370,7 +369,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { Expect(approvalCreateTime.Before(waitEndTime)).Should(BeTrue()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should create approval request before 2nd stage", func() { @@ -393,7 +392,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateApprovalRequestCreated(wantApprovalRequest) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should not start rolling out 2nd stage", func() { @@ -405,7 +404,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { Expect(updateRun.Status.StagesStatus[1].StartTime).Should(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should accept the approval request and start to rollout 2nd stage", func() { @@ -448,7 +447,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 2nd cluster in the 2nd stage as succeeded after marking the binding available", func() { @@ -466,7 +465,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 3rd cluster in the 2nd stage as succeeded after marking the binding available", func() { @@ -484,7 +483,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 4th cluster in the 2nd stage as succeeded after marking the binding available", func() { @@ -502,7 +501,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 5th cluster in the 2nd stage as succeeded after marking the binding available", func() { @@ -523,7 +522,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should complete the 2nd stage after both after stage tasks are completed and move on to the delete stage", func() { @@ -586,7 +585,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { }, timeout, interval).Should(BeTrue(), "failed to validate the approvalRequest approval accepted") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should delete all the clusterResourceBindings in the delete stage and complete the update run", func() { @@ -618,7 +617,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun), generateSucceededMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateSucceededMetric(placementv1beta1.StateRun, updateRun)) }) }) @@ -638,7 +637,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) AfterAll(func() { @@ -682,7 +681,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { It("Should emit stuck status metrics after time waiting for the 1st cluster reaches threshold", func() { By("Checking update run stuck metrics is emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun), generateStuckMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStuckMetric(placementv1beta1.StateRun, updateRun)) }) It("Should abort the execution if the binding has unexpected state", func() { @@ -703,7 +702,7 @@ var _ = Describe("UpdateRun execution tests - double stages", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun), generateStuckMetric(updateRun), generateFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStuckMetric(placementv1beta1.StateRun, updateRun), generateFailedMetric(placementv1beta1.StateRun, updateRun)) }) }) }) @@ -815,7 +814,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 1st cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -836,7 +835,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 2nd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -854,7 +853,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 3rd cluster in the 1st stage as succeeded after marking the binding available and complete the updateRun", func() { @@ -883,7 +882,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].EndTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateSucceededMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateSucceededMetric(placementv1beta1.StateRun, updateRun)) }) }) @@ -909,7 +908,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 1st cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -930,7 +929,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 2nd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -948,7 +947,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 3rd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -967,7 +966,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should complete the 1st stage after the after stage task is completed and complete the updateRun", func() { @@ -995,7 +994,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { fmt.Sprintf("waitEndTime %v did not pass waitStartTime %v long enough, want at least %v", waitEndTime, waitStartTime, updateStrategy.Spec.Stages[0].AfterStageTasks[0].WaitTime.Duration)) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun), generateSucceededMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateSucceededMetric(placementv1beta1.StateRun, updateRun)) }) }) @@ -1018,7 +1017,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 1st cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -1039,7 +1038,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 2nd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -1057,7 +1056,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 3rd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -1078,7 +1077,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should complete the 1st stage after approval request is approved and complete the updateRun", func() { @@ -1131,7 +1130,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { }, timeout, interval).Should(BeTrue(), "failed to validate the approvalRequest approval accepted") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateProgressingMetric(updateRun), generateSucceededMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateSucceededMetric(placementv1beta1.StateRun, updateRun)) }) }) @@ -1150,7 +1149,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 1st cluster in the 1st stage as succeeded after marking the binding diff reported", func() { @@ -1171,7 +1170,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 2nd cluster in the 1st stage as succeeded after marking the binding diff reported", func() { @@ -1189,7 +1188,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 3rd cluster in the 1st stage as succeeded after marking the binding diff reported and complete the updateRun", func() { @@ -1218,7 +1217,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].EndTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateSucceededMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateSucceededMetric(placementv1beta1.StateRun, updateRun)) }) }) @@ -1242,7 +1241,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) AfterAll(func() { @@ -1268,7 +1267,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { It("Should emit stuck status metrics after time waiting for the 1st cluster reaches threshold", func() { By("Checking update run stuck metrics is emitted") - validateUpdateRunMetricsEmitted(generateProgressingMetric(updateRun), generateStuckMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStuckMetric(placementv1beta1.StateRun, updateRun)) }) }) @@ -1333,7 +1332,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).Should(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should start the 1st stage after approval request is approved", func() { @@ -1527,7 +1526,6 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { }) Context("Cluster staged update run should update clusters one by one - different states (Initialize -> Run)", Ordered, func() { - var wantMetrics []*promclient.Metric BeforeAll(func() { By("Creating a new clusterStagedUpdateRun") updateRun.Spec.State = placementv1beta1.StateInitialize @@ -1538,8 +1536,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateInitializationSucceededMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateInitializationSucceededMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should not start execution when the state is Initialize", func() { @@ -1569,8 +1566,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateProgressingMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateInitializationSucceededMetric(placementv1beta1.StateInitialize, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 1st cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -1591,7 +1587,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateInitializationSucceededMetric(placementv1beta1.StateInitialize, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 2nd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -1609,7 +1605,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateInitializationSucceededMetric(placementv1beta1.StateInitialize, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 3rd cluster in the 1st stage as succeeded after marking the binding available and complete the updateRun", func() { @@ -1638,8 +1634,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(updateRun.Status.StagesStatus[0].EndTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateSucceededMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateInitializationSucceededMetric(placementv1beta1.StateInitialize, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateSucceededMetric(placementv1beta1.StateRun, updateRun)) }) }) }) diff --git a/pkg/controllers/updaterun/initialization_integration_test.go b/pkg/controllers/updaterun/initialization_integration_test.go index eca02f51c..ae0c980e9 100644 --- a/pkg/controllers/updaterun/initialization_integration_test.go +++ b/pkg/controllers/updaterun/initialization_integration_test.go @@ -69,6 +69,7 @@ var _ = Describe("Updaterun initialization tests", func() { updateRunNamespacedName = types.NamespacedName{Name: testUpdateRunName} updateRun = generateTestClusterStagedUpdateRun() + updateRun.Spec.State = placementv1beta1.StateInitialize crp = generateTestClusterResourcePlacement() updateStrategy = generateTestClusterStagedUpdateStrategy() clusterResourceOverride = generateTestClusterResourceOverride() @@ -136,7 +137,7 @@ var _ = Describe("Updaterun initialization tests", func() { Context("Test validateCRP", func() { AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should fail to initialize if CRP is not found", func() { @@ -188,7 +189,7 @@ var _ = Describe("Updaterun initialization tests", func() { AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should fail to initialize if the latest policy snapshot is not found", func() { @@ -298,7 +299,7 @@ var _ = Describe("Updaterun initialization tests", func() { AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should copy the latest policy snapshot details to the updateRun status -- pickFixed policy", func() { By("Creating scheduling policy snapshot with pickFixed policy") @@ -346,7 +347,7 @@ var _ = Describe("Updaterun initialization tests", func() { }) AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should copy the latest policy snapshot details to the updateRun status -- pickAll policy", func() { @@ -404,7 +405,7 @@ var _ = Describe("Updaterun initialization tests", func() { AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should fail to initialize if there is no selected or to-be-deleted cluster", func() { By("Creating a new clusterStagedUpdateRun") @@ -498,7 +499,7 @@ var _ = Describe("Updaterun initialization tests", func() { AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should not report error if there are only to-be-deleted clusters", func() { @@ -563,7 +564,7 @@ var _ = Describe("Updaterun initialization tests", func() { AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("fail the initialization if the clusterStagedUpdateStrategy is not found", func() { @@ -765,7 +766,7 @@ var _ = Describe("Updaterun initialization tests", func() { validateFailedInitCondition(ctx, updateRun, "invalid resource snapshot index `invalid-index` provided") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should fail to initialize if the specified resource snapshot index is invalid - negative integer", func() { @@ -777,7 +778,7 @@ var _ = Describe("Updaterun initialization tests", func() { validateFailedInitCondition(ctx, updateRun, "invalid resource snapshot index `-1` provided") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should fail to initialize if the specified resource snapshot is not found - no resourceSnapshots at all", func() { @@ -788,7 +789,7 @@ var _ = Describe("Updaterun initialization tests", func() { validateFailedInitCondition(ctx, updateRun, "no resourceSnapshots with index `0` found") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should NOT fail to initialize if the specified resource snapshot is not found when no resource index specified - no resourceSnapshots at all", func() { @@ -828,7 +829,7 @@ var _ = Describe("Updaterun initialization tests", func() { validateFailedInitCondition(ctx, updateRun, "no resourceSnapshots with index `0` found") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should fail to initialize if the specified resource snapshot is not found - no resource index label found", func() { @@ -843,7 +844,7 @@ var _ = Describe("Updaterun initialization tests", func() { validateFailedInitCondition(ctx, updateRun, "no resourceSnapshots with index `0` found") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should fail to initialize if the specified resource snapshot is not master snapshot", func() { @@ -858,7 +859,7 @@ var _ = Describe("Updaterun initialization tests", func() { validateFailedInitCondition(ctx, updateRun, "no master resourceSnapshot found for placement") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationFailedMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should select latest resource snapshot in the status when no resource index defined", func() { @@ -874,14 +875,14 @@ var _ = Describe("Updaterun initialization tests", func() { By("Validating the clusterStagedUpdateRun stats") initialized := generateSucceededInitializationStatus(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, clusterResourceOverride) - want := generateExecutionNotStartedStatus(updateRun, initialized) - validateClusterStagedUpdateRunStatus(ctx, updateRun, want, "") + validateClusterStagedUpdateRunStatus(ctx, updateRun, initialized, "") + Expect(updateRun.Status.ResourceSnapshotIndexUsed).To(Equal(testResourceSnapshotIndex), "resource snapshot index used mismatch in the updateRun status") By("Validating the clusterStagedUpdateRun initialized consistently") - validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, want, "") + validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, initialized, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationSucceededMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should put related ClusterResourceOverrides in the status", func() { @@ -896,14 +897,13 @@ var _ = Describe("Updaterun initialization tests", func() { By("Validating the clusterStagedUpdateRun stats") initialized := generateSucceededInitializationStatus(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, clusterResourceOverride) - want := generateExecutionNotStartedStatus(updateRun, initialized) - validateClusterStagedUpdateRunStatus(ctx, updateRun, want, "") + validateClusterStagedUpdateRunStatus(ctx, updateRun, initialized, "") By("Validating the clusterStagedUpdateRun initialized consistently") - validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, want, "") + validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, initialized, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationSucceededMetric(placementv1beta1.StateInitialize, updateRun)) }) It("Should pick latest master resource snapshot if multiple snapshots", func() { @@ -931,14 +931,13 @@ var _ = Describe("Updaterun initialization tests", func() { By("Validating the clusterStagedUpdateRun status") initialized := generateSucceededInitializationStatus(crp, updateRun, "2", policySnapshot, updateStrategy, clusterResourceOverride) - want := generateExecutionNotStartedStatus(updateRun, initialized) - validateClusterStagedUpdateRunStatus(ctx, updateRun, want, "") + validateClusterStagedUpdateRunStatus(ctx, updateRun, initialized, "") By("Validating the clusterStagedUpdateRun initialized consistently") - validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, want, "") + validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, initialized, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateInitializationSucceededMetric(placementv1beta1.StateInitialize, updateRun)) }) }) }) diff --git a/pkg/controllers/updaterun/stop_integration_test.go b/pkg/controllers/updaterun/stop_integration_test.go index a6ed246cd..c6e926ed5 100644 --- a/pkg/controllers/updaterun/stop_integration_test.go +++ b/pkg/controllers/updaterun/stop_integration_test.go @@ -23,7 +23,6 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - promclient "github.com/prometheus/client_model/go" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -163,7 +162,6 @@ var _ = Describe("UpdateRun stop tests", func() { Context("Cluster staged update run should have stopped when state Stop", Ordered, func() { var wantApprovalRequest *placementv1beta1.ClusterApprovalRequest - var wantMetrics []*promclient.Metric BeforeAll(func() { // Add finalizer to one of the bindings for unscheduled cluster to test deletion stage later. binding := resourceBindings[numTargetClusters] // first unscheduled cluster @@ -198,8 +196,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateApprovalRequestCreated(wantApprovalRequest) By("Checking update run status metrics are emitted") - wantMetrics = []*promclient.Metric{generateWaitingMetric(updateRun)} - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should stop the update run in BeforeStageTask for 1st stage when state is Stop", func() { @@ -216,8 +213,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateStoppedMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun)) }) It("Should accept the approval request and not rollout 1st stage while in Stop state", func() { @@ -228,7 +224,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun)) }) It("Should start executing stage 1 of the update run when state is Run", func() { @@ -248,8 +244,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateProgressingMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 1st cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -270,7 +265,7 @@ var _ = Describe("UpdateRun stop tests", func() { Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should be stopping in the middle of cluster updating when update run state is Stop", func() { @@ -288,8 +283,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateStoppingMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStoppingMetric(placementv1beta1.StateStop, updateRun)) }) It("Should wait for cluster to finish updating so update run should still be stopping", func() { @@ -297,7 +291,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStoppingMetric(placementv1beta1.StateStop, updateRun)) }) It("Should have completely stopped after the in-progress cluster has finished updating", func() { @@ -319,8 +313,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateStoppedMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun)) By("Validating update run is in stopped state") validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") @@ -346,8 +339,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateProgressingMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should mark the 3rd cluster in the 1st stage as succeeded after marking the binding available", func() { @@ -370,8 +362,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateWaitingMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should have approval request created for 1st stage AfterStageTask", func() { @@ -394,7 +385,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateApprovalRequestCreated(wantApprovalRequest) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should stop the update run in AfterStageTask for 1st stage when state is Stop", func() { @@ -411,8 +402,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateStoppedMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun)) }) It("Should not continue to delete stage after approval when still stopped", func() { @@ -438,7 +428,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun)) }) It("Should complete the 1st stage once it starts running again when wait time passed and approval request approved then move on to the Delete stage", func() { @@ -487,8 +477,7 @@ var _ = Describe("UpdateRun stop tests", func() { Expect(approvalCreateTime.Before(waitEndTime)).Should(BeTrue()) By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateProgressingMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun)) }) It("Should stop the update run in deletion stage when state is Stop", func() { @@ -505,8 +494,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateStoppingMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStoppingMetric(placementv1beta1.StateStop, updateRun)) }) It("Should not complete deletion stage when in progress clusters still deleting while stopped", func() { @@ -526,7 +514,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStoppingMetric(placementv1beta1.StateStop, updateRun)) }) It("Should stop completely after in-progress deletion is done when state is Stop", func() { @@ -569,8 +557,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateStoppedMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun)) }) It("Should complete delete stage and complete the update run when state is Run", func() { @@ -611,8 +598,7 @@ var _ = Describe("UpdateRun stop tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - wantMetrics = append(wantMetrics, generateSucceededMetric(updateRun)) - validateUpdateRunMetricsEmitted(wantMetrics...) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateProgressingMetric(placementv1beta1.StateRun, updateRun), generateStoppingMetric(placementv1beta1.StateStop, updateRun), generateStoppedMetric(placementv1beta1.StateStop, updateRun), generateSucceededMetric(placementv1beta1.StateRun, updateRun)) }) }) }) diff --git a/pkg/controllers/updaterun/validation_integration_test.go b/pkg/controllers/updaterun/validation_integration_test.go index b018a43ce..12ee2438f 100644 --- a/pkg/controllers/updaterun/validation_integration_test.go +++ b/pkg/controllers/updaterun/validation_integration_test.go @@ -165,7 +165,7 @@ var _ = Describe("UpdateRun validation tests", func() { Context("Test validateCRP", func() { AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateFailedMetric(placementv1beta1.StateRun, updateRun)) }) It("Should fail to validate if the CRP is not found", func() { @@ -212,7 +212,7 @@ var _ = Describe("UpdateRun validation tests", func() { validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "no latest policy snapshot associated") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateFailedMetric(placementv1beta1.StateRun, updateRun)) }) It("Should fail to validate if the latest policySnapshot has changed", func() { @@ -241,7 +241,7 @@ var _ = Describe("UpdateRun validation tests", func() { Expect(k8sClient.Delete(ctx, newPolicySnapshot)).Should(Succeed()) By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateFailedMetric(placementv1beta1.StateRun, updateRun)) }) It("Should fail to validate if the cluster count has changed", func() { @@ -255,14 +255,14 @@ var _ = Describe("UpdateRun validation tests", func() { "the cluster count initialized in the updateRun is outdated") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateFailedMetric(placementv1beta1.StateRun, updateRun)) }) }) Context("Test validateStagesStatus", func() { AfterEach(func() { By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun), generateFailedMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun), generateFailedMetric(placementv1beta1.StateRun, updateRun)) }) It("Should fail to validate if the UpdateStrategySnapshot is nil", func() { @@ -511,7 +511,7 @@ var _ = Describe("UpdateRun validation tests", func() { validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") - validateUpdateRunMetricsEmitted(generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(generateWaitingMetric(placementv1beta1.StateRun, updateRun)) }) }) }) diff --git a/pkg/metrics/hub/metrics.go b/pkg/metrics/hub/metrics.go index 353cf99ea..ade482161 100644 --- a/pkg/metrics/hub/metrics.go +++ b/pkg/metrics/hub/metrics.go @@ -41,7 +41,7 @@ var ( FleetUpdateRunStatusLastTimestampSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "fleet_workload_update_run_status_last_timestamp_seconds", Help: "Last update timestamp of update run status in seconds", - }, []string{"namespace", "name", "generation", "condition", "status", "reason"}) + }, []string{"namespace", "name", "state", "condition", "status", "reason"}) ) // The scheduler related metrics.