From 7a220d558d48734a5b3f73a6fb27521e7197e18a Mon Sep 17 00:00:00 2001 From: Salvatore Dario Minonne Date: Tue, 5 May 2026 10:13:18 +0200 Subject: [PATCH] fix(OCPBUGS-60093): propagate nodeSelector removal to control plane workloads When a user removes nodeSelector from the HostedControlPlane spec, the change was not propagated to control plane Deployments and StatefulSets. Two issues prevented this: 1. setDefaultOptions only assigned nodeSelector conditionally (when non-nil), so clearing it from the HCP had no effect on pod templates. 2. ApplyManifest uses DeepDerivative for change detection, which treats nil/empty maps as "don't care", silently skipping nodeSelector removal even when the desired state had no nodeSelector. This commit fixes both issues: - Merge HCP nodeSelector on top of the template's existing nodeSelector via maps.Copy in setDefaultOptions. This preserves entries from component YAML templates (e.g., kubernetes.io/os: linux used by 5 catalog and aws-node-termination-handler deployments) while applying HCP-specified entries on top. When HCP nodeSelector is nil, only the template entries remain, so removal of HCP-added entries is handled naturally because cpov2 loads templates fresh each reconciliation. - Add nodeSelector count tracking in ApplyManifest to force updates when nodeSelector entries are removed, matching the existing pattern for label removal detection. Only Deployment and StatefulSet are handled as they are the only workload kinds in the cpov2 framework. - Clone metadata maps in preserveOriginalMetadata to prevent aliasing between the existing and desired objects. This fixes a pre-existing bug where label/annotation value changes with the same key count were silently dropped because both objects shared the same underlying map. Co-Authored-By: Claude Opus 4.6 --- support/controlplane-component/defaults.go | 14 +- .../controlplane-component/defaults_test.go | 85 +++++ support/upsert/apply.go | 36 +- support/upsert/apply_test.go | 317 ++++++++++++++++++ 4 files changed, 446 insertions(+), 6 deletions(-) diff --git a/support/controlplane-component/defaults.go b/support/controlplane-component/defaults.go index 5bebe7d8822..cc934a5798e 100644 --- a/support/controlplane-component/defaults.go +++ b/support/controlplane-component/defaults.go @@ -165,10 +165,18 @@ func (c *controlPlaneWorkload[T]) setDefaultOptions(cpContext ControlPlaneContex // set PriorityClassName podTemplateSpec.Spec.PriorityClassName = priorityClass(c.Name(), hcp) - // setNodeSelector sets a nodeSelector passed through the API. - // This is useful to e.g ensure control plane pods land in management cluster Infra Nodes. + // Merge HCP nodeSelector on top of the template's existing nodeSelector. + // This preserves entries from component YAML templates (e.g., kubernetes.io/os: linux) + // while allowing the HCP spec to add additional selectors. When hcp.Spec.NodeSelector + // is nil, only the template entries remain, so removing nodeSelector from the HCP + // correctly removes HCP-added entries without wiping template-defined entries. + // Removal is detected by ApplyManifest via getNodeSelectorCount, because the template + // is loaded fresh each reconciliation. if hcp.Spec.NodeSelector != nil { - podTemplateSpec.Spec.NodeSelector = hcp.Spec.NodeSelector + if podTemplateSpec.Spec.NodeSelector == nil { + podTemplateSpec.Spec.NodeSelector = make(map[string]string) + } + maps.Copy(podTemplateSpec.Spec.NodeSelector, hcp.Spec.NodeSelector) } c.setLabels(podTemplateSpec, hcp) diff --git a/support/controlplane-component/defaults_test.go b/support/controlplane-component/defaults_test.go index f1c726fe2af..621795c42a6 100644 --- a/support/controlplane-component/defaults_test.go +++ b/support/controlplane-component/defaults_test.go @@ -310,3 +310,88 @@ func TestSetDefaultOptions(t *testing.T) { g.Expect(workloadObject.Spec.Template.Spec.SecurityContext.RunAsUser).To(Equal(ptr.To(int64(1002)))) g.Expect(workloadObject.Spec.Template.Spec.SecurityContext.FSGroup).To(Equal(ptr.To(int64(1002)))) } + +func TestSetDefaultOptionsNodeSelector(t *testing.T) { + scheme := runtime.NewScheme() + _ = hyperv1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + _ = appsv1.AddToScheme(scheme) + + tests := []struct { + name string + hcpNodeSelector map[string]string + templateNodeSelector map[string]string + expectedNodeSelector map[string]string + }{ + { + name: "When HCP nodeSelector is set and template has no nodeSelector, it should apply HCP nodeSelector", + hcpNodeSelector: map[string]string{"node-role": "infra"}, + expectedNodeSelector: map[string]string{"node-role": "infra"}, + }, + { + name: "When HCP nodeSelector is nil and template has a nodeSelector, it should preserve the template nodeSelector", + hcpNodeSelector: nil, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + }, + { + name: "When HCP nodeSelector is nil and template has no nodeSelector, it should remain nil", + hcpNodeSelector: nil, + templateNodeSelector: nil, + expectedNodeSelector: nil, + }, + { + name: "When HCP nodeSelector is an empty map and template has a nodeSelector, it should preserve the template nodeSelector", + hcpNodeSelector: map[string]string{}, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + }, + { + name: "When both HCP and template have nodeSelectors, it should merge them with HCP entries added", + hcpNodeSelector: map[string]string{"node-role": "infra"}, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "linux", "node-role": "infra"}, + }, + { + name: "When HCP and template have overlapping keys, HCP value should take precedence", + hcpNodeSelector: map[string]string{"kubernetes.io/os": "windows"}, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "windows"}, + }, + { + name: "When HCP nodeSelector is removed after being set, template nodeSelector should be preserved", + hcpNodeSelector: nil, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewGomegaWithT(t) + cpw := &controlPlaneWorkload[*appsv1.Deployment]{ + name: "test-component", + workloadProvider: &deploymentProvider{}, + ComponentOptions: &testComponent{}, + } + dep := &appsv1.Deployment{} + dep.Spec.Template.Spec.NodeSelector = tt.templateNodeSelector + + hcp := &hyperv1.HostedControlPlane{ + Spec: hyperv1.HostedControlPlaneSpec{ + NodeSelector: tt.hcpNodeSelector, + }, + } + err := cpw.setDefaultOptions(ControlPlaneContext{ + HCP: hcp, + Client: fake.NewClientBuilder().WithScheme(scheme).Build(), + }, dep, nil) + g.Expect(err).ToNot(HaveOccurred()) + if tt.expectedNodeSelector == nil { + g.Expect(dep.Spec.Template.Spec.NodeSelector).To(BeNil()) + } else { + g.Expect(dep.Spec.Template.Spec.NodeSelector).To(Equal(tt.expectedNodeSelector)) + } + }) + } +} diff --git a/support/upsert/apply.go b/support/upsert/apply.go index 5364cf67441..eb68a578b91 100644 --- a/support/upsert/apply.go +++ b/support/upsert/apply.go @@ -104,9 +104,11 @@ func (p *applyProvider) ApplyManifest(ctx context.Context, c crclient.Client, ob func (p *applyProvider) update(ctx context.Context, c crclient.Client, obj crclient.Object, existing crclient.Object) (controllerutil.OperationResult, error) { key := crclient.ObjectKeyFromObject(obj) - // Save original labels before preserveOriginalMetadata modifies them + // Save original labels and nodeSelector count before preserveOriginalMetadata + // modifies them, so removal detection compares against the true originals. originalLabels := existing.GetLabels() originalLabelCount := len(originalLabels) + originalNodeSelectorCount := getNodeSelectorCount(existing) switch existingTyped := existing.(type) { case *corev1.ServiceAccount: @@ -141,6 +143,14 @@ func (p *applyProvider) update(ctx context.Context, c crclient.Client, obj crcli isEqual = false } + // Special handling for nodeSelector removal: DeepDerivative treats nil/empty + // nodeSelector as "don't care", so removing nodeSelector entries from a + // Deployment or StatefulSet would otherwise be silently ignored. + mutatedNodeSelectorCount := getNodeSelectorCount(obj) + if mutatedNodeSelectorCount < originalNodeSelectorCount { + isEqual = false + } + // If objects are equal (no changes needed), record no-op update and return early if isEqual { if p.loopDetector != nil { @@ -192,7 +202,11 @@ func cleanupRemovalMarkers(obj crclient.Object) { } func preserveOriginalMetadata(original, mutated crclient.Object) { - labels := original.GetLabels() + // Clone the maps so we don't mutate the original object's metadata. + // Without cloning, the comparison in update() between 'existing' (serialized + // after this function runs) and 'obj' would see identical labels/annotations, + // hiding value changes when the key count stays the same. + labels := maps.Clone(original.GetLabels()) if labels == nil { labels = make(map[string]string) } @@ -207,7 +221,7 @@ func preserveOriginalMetadata(original, mutated crclient.Object) { } mutated.SetLabels(labels) - annotations := original.GetAnnotations() + annotations := maps.Clone(original.GetAnnotations()) if annotations == nil { annotations = make(map[string]string) } @@ -242,6 +256,22 @@ var ( } ) +// getNodeSelectorCount returns the number of nodeSelector entries for +// Deployment and StatefulSet pod templates, or 0 for other types. +// Only these two types are handled because they are the only workload kinds +// managed by the cpov2 (control-plane-component) framework. Other kinds like +// DaemonSet or Job are not used as control plane workloads. +func getNodeSelectorCount(obj crclient.Object) int { + switch typed := obj.(type) { + case *appsv1.Deployment: + return len(typed.Spec.Template.Spec.NodeSelector) + case *appsv1.StatefulSet: + return len(typed.Spec.Template.Spec.NodeSelector) + default: + return 0 + } +} + func toUnstructured(obj crclient.Object) (map[string]any, error) { // Create a copy of the original object as well as converting that copy to // unstructured data. diff --git a/support/upsert/apply_test.go b/support/upsert/apply_test.go index e3a5b54940a..271a1fc5ef0 100644 --- a/support/upsert/apply_test.go +++ b/support/upsert/apply_test.go @@ -4,6 +4,8 @@ import ( "testing" "time" + . "github.com/onsi/gomega" + "github.com/openshift/hypershift/support/api" "github.com/openshift/hypershift/support/netutil" @@ -254,6 +256,321 @@ func TestApplyManifestLabelRemovalWithEmptyLabels(t *testing.T) { } } +// TestApplyManifestNodeSelectorRemoval proves that removing nodeSelector from a +// Deployment or StatefulSet triggers an update even though DeepDerivative treats +// nil/empty nodeSelector as "don't care". Without the getNodeSelectorCount check +// in update(), this removal would be silently ignored, leaving pods Pending on +// nodes that no longer exist. +func TestApplyManifestNodeSelectorRemoval(t *testing.T) { + t.Run("When nodeSelector is removed from a Deployment, it should trigger an update and clear the nodeSelector", func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + }, + }, + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{}, + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Spec.Template.Spec.NodeSelector).To(BeEmpty()) + }) + + t.Run("When nodeSelector is removed from a StatefulSet, it should trigger an update and clear the nodeSelector", func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + existingStatefulSet := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sts", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + }, + }, + }, + }, + } + + desiredStatefulSet := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sts", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{}, + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingStatefulSet).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredStatefulSet) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.StatefulSet + err = c.Get(t.Context(), types.NamespacedName{Name: "test-sts", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Spec.Template.Spec.NodeSelector).To(BeEmpty()) + }) + + t.Run("When nodeSelector is partially removed from a Deployment, it should trigger an update and keep only the remaining entries", func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + "topology-zone": "us-east-1a", + }, + }, + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + }, + }, + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Spec.Template.Spec.NodeSelector).To(Equal(map[string]string{ + "node-role": "infra", + })) + }) +} + +// TestApplyManifestLabelValueChange proves that maps.Clone in preserveOriginalMetadata +// prevents the existing object's labels from being mutated in-place. Without the clone, +// changing a label value (same key count) would be invisible to DeepDerivative because +// both existing and desired would share the same underlying map. +func TestApplyManifestLabelValueChange(t *testing.T) { + t.Run("When a label value changes without key count change, it should trigger an update", func(t *testing.T) { + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + "version": "v1", + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + "version": "v2", + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Labels["version"]).To(Equal("v2")) + }) +} + +// TestApplyManifestAnnotationValueChange proves that maps.Clone in preserveOriginalMetadata +// prevents the existing object's annotations from being mutated in-place. Without the clone, +// changing an annotation value (same key count) would be invisible to DeepDerivative because +// both existing and desired would share the same underlying map. +func TestApplyManifestAnnotationValueChange(t *testing.T) { + t.Run("When an annotation value changes without key count change, it should trigger an update", func(t *testing.T) { + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Annotations: map[string]string{ + "config": "old-value", + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Annotations: map[string]string{ + "config": "new-value", + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Annotations["config"]).To(Equal("new-value")) + }) +} + +// TestApplyManifestNodeSelectorValueChange proves that changing a nodeSelector value +// (with constant key count) is detected via DeepDerivative, not getNodeSelectorCount. +// This path is different from nodeSelector removal and relies on preserveOriginalMetadata +// not mutating the existing object's pod template. +func TestApplyManifestNodeSelectorValueChange(t *testing.T) { + t.Run("When a nodeSelector value changes without key count change, it should trigger an update", func(t *testing.T) { + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + }, + }, + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "worker", + }, + }, + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Spec.Template.Spec.NodeSelector).To(Equal(map[string]string{ + "node-role": "worker", + })) + }) +} + // TestApplyManifestLabelRemovalOnCreate tests that removal markers are cleaned up // when creating a new object. This is critical because Kubernetes validates label // values during creation, and the removal marker value is not a valid label value.