diff --git a/support/controlplane-component/defaults.go b/support/controlplane-component/defaults.go index 5bebe7d8822..cc934a5798e 100644 --- a/support/controlplane-component/defaults.go +++ b/support/controlplane-component/defaults.go @@ -165,10 +165,18 @@ func (c *controlPlaneWorkload[T]) setDefaultOptions(cpContext ControlPlaneContex // set PriorityClassName podTemplateSpec.Spec.PriorityClassName = priorityClass(c.Name(), hcp) - // setNodeSelector sets a nodeSelector passed through the API. - // This is useful to e.g ensure control plane pods land in management cluster Infra Nodes. + // Merge HCP nodeSelector on top of the template's existing nodeSelector. + // This preserves entries from component YAML templates (e.g., kubernetes.io/os: linux) + // while allowing the HCP spec to add additional selectors. When hcp.Spec.NodeSelector + // is nil, only the template entries remain, so removing nodeSelector from the HCP + // correctly removes HCP-added entries without wiping template-defined entries. + // Removal is detected by ApplyManifest via getNodeSelectorCount, because the template + // is loaded fresh each reconciliation. if hcp.Spec.NodeSelector != nil { - podTemplateSpec.Spec.NodeSelector = hcp.Spec.NodeSelector + if podTemplateSpec.Spec.NodeSelector == nil { + podTemplateSpec.Spec.NodeSelector = make(map[string]string) + } + maps.Copy(podTemplateSpec.Spec.NodeSelector, hcp.Spec.NodeSelector) } c.setLabels(podTemplateSpec, hcp) diff --git a/support/controlplane-component/defaults_test.go b/support/controlplane-component/defaults_test.go index f1c726fe2af..621795c42a6 100644 --- a/support/controlplane-component/defaults_test.go +++ b/support/controlplane-component/defaults_test.go @@ -310,3 +310,88 @@ func TestSetDefaultOptions(t *testing.T) { g.Expect(workloadObject.Spec.Template.Spec.SecurityContext.RunAsUser).To(Equal(ptr.To(int64(1002)))) g.Expect(workloadObject.Spec.Template.Spec.SecurityContext.FSGroup).To(Equal(ptr.To(int64(1002)))) } + +func TestSetDefaultOptionsNodeSelector(t *testing.T) { + scheme := runtime.NewScheme() + _ = hyperv1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + _ = appsv1.AddToScheme(scheme) + + tests := []struct { + name string + hcpNodeSelector map[string]string + templateNodeSelector map[string]string + expectedNodeSelector map[string]string + }{ + { + name: "When HCP nodeSelector is set and template has no nodeSelector, it should apply HCP nodeSelector", + hcpNodeSelector: map[string]string{"node-role": "infra"}, + expectedNodeSelector: map[string]string{"node-role": "infra"}, + }, + { + name: "When HCP nodeSelector is nil and template has a nodeSelector, it should preserve the template nodeSelector", + hcpNodeSelector: nil, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + }, + { + name: "When HCP nodeSelector is nil and template has no nodeSelector, it should remain nil", + hcpNodeSelector: nil, + templateNodeSelector: nil, + expectedNodeSelector: nil, + }, + { + name: "When HCP nodeSelector is an empty map and template has a nodeSelector, it should preserve the template nodeSelector", + hcpNodeSelector: map[string]string{}, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + }, + { + name: "When both HCP and template have nodeSelectors, it should merge them with HCP entries added", + hcpNodeSelector: map[string]string{"node-role": "infra"}, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "linux", "node-role": "infra"}, + }, + { + name: "When HCP and template have overlapping keys, HCP value should take precedence", + hcpNodeSelector: map[string]string{"kubernetes.io/os": "windows"}, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "windows"}, + }, + { + name: "When HCP nodeSelector is removed after being set, template nodeSelector should be preserved", + hcpNodeSelector: nil, + templateNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + expectedNodeSelector: map[string]string{"kubernetes.io/os": "linux"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewGomegaWithT(t) + cpw := &controlPlaneWorkload[*appsv1.Deployment]{ + name: "test-component", + workloadProvider: &deploymentProvider{}, + ComponentOptions: &testComponent{}, + } + dep := &appsv1.Deployment{} + dep.Spec.Template.Spec.NodeSelector = tt.templateNodeSelector + + hcp := &hyperv1.HostedControlPlane{ + Spec: hyperv1.HostedControlPlaneSpec{ + NodeSelector: tt.hcpNodeSelector, + }, + } + err := cpw.setDefaultOptions(ControlPlaneContext{ + HCP: hcp, + Client: fake.NewClientBuilder().WithScheme(scheme).Build(), + }, dep, nil) + g.Expect(err).ToNot(HaveOccurred()) + if tt.expectedNodeSelector == nil { + g.Expect(dep.Spec.Template.Spec.NodeSelector).To(BeNil()) + } else { + g.Expect(dep.Spec.Template.Spec.NodeSelector).To(Equal(tt.expectedNodeSelector)) + } + }) + } +} diff --git a/support/upsert/apply.go b/support/upsert/apply.go index 5364cf67441..eb68a578b91 100644 --- a/support/upsert/apply.go +++ b/support/upsert/apply.go @@ -104,9 +104,11 @@ func (p *applyProvider) ApplyManifest(ctx context.Context, c crclient.Client, ob func (p *applyProvider) update(ctx context.Context, c crclient.Client, obj crclient.Object, existing crclient.Object) (controllerutil.OperationResult, error) { key := crclient.ObjectKeyFromObject(obj) - // Save original labels before preserveOriginalMetadata modifies them + // Save original labels and nodeSelector count before preserveOriginalMetadata + // modifies them, so removal detection compares against the true originals. originalLabels := existing.GetLabels() originalLabelCount := len(originalLabels) + originalNodeSelectorCount := getNodeSelectorCount(existing) switch existingTyped := existing.(type) { case *corev1.ServiceAccount: @@ -141,6 +143,14 @@ func (p *applyProvider) update(ctx context.Context, c crclient.Client, obj crcli isEqual = false } + // Special handling for nodeSelector removal: DeepDerivative treats nil/empty + // nodeSelector as "don't care", so removing nodeSelector entries from a + // Deployment or StatefulSet would otherwise be silently ignored. + mutatedNodeSelectorCount := getNodeSelectorCount(obj) + if mutatedNodeSelectorCount < originalNodeSelectorCount { + isEqual = false + } + // If objects are equal (no changes needed), record no-op update and return early if isEqual { if p.loopDetector != nil { @@ -192,7 +202,11 @@ func cleanupRemovalMarkers(obj crclient.Object) { } func preserveOriginalMetadata(original, mutated crclient.Object) { - labels := original.GetLabels() + // Clone the maps so we don't mutate the original object's metadata. + // Without cloning, the comparison in update() between 'existing' (serialized + // after this function runs) and 'obj' would see identical labels/annotations, + // hiding value changes when the key count stays the same. + labels := maps.Clone(original.GetLabels()) if labels == nil { labels = make(map[string]string) } @@ -207,7 +221,7 @@ func preserveOriginalMetadata(original, mutated crclient.Object) { } mutated.SetLabels(labels) - annotations := original.GetAnnotations() + annotations := maps.Clone(original.GetAnnotations()) if annotations == nil { annotations = make(map[string]string) } @@ -242,6 +256,22 @@ var ( } ) +// getNodeSelectorCount returns the number of nodeSelector entries for +// Deployment and StatefulSet pod templates, or 0 for other types. +// Only these two types are handled because they are the only workload kinds +// managed by the cpov2 (control-plane-component) framework. Other kinds like +// DaemonSet or Job are not used as control plane workloads. +func getNodeSelectorCount(obj crclient.Object) int { + switch typed := obj.(type) { + case *appsv1.Deployment: + return len(typed.Spec.Template.Spec.NodeSelector) + case *appsv1.StatefulSet: + return len(typed.Spec.Template.Spec.NodeSelector) + default: + return 0 + } +} + func toUnstructured(obj crclient.Object) (map[string]any, error) { // Create a copy of the original object as well as converting that copy to // unstructured data. diff --git a/support/upsert/apply_test.go b/support/upsert/apply_test.go index e3a5b54940a..271a1fc5ef0 100644 --- a/support/upsert/apply_test.go +++ b/support/upsert/apply_test.go @@ -4,6 +4,8 @@ import ( "testing" "time" + . "github.com/onsi/gomega" + "github.com/openshift/hypershift/support/api" "github.com/openshift/hypershift/support/netutil" @@ -254,6 +256,321 @@ func TestApplyManifestLabelRemovalWithEmptyLabels(t *testing.T) { } } +// TestApplyManifestNodeSelectorRemoval proves that removing nodeSelector from a +// Deployment or StatefulSet triggers an update even though DeepDerivative treats +// nil/empty nodeSelector as "don't care". Without the getNodeSelectorCount check +// in update(), this removal would be silently ignored, leaving pods Pending on +// nodes that no longer exist. +func TestApplyManifestNodeSelectorRemoval(t *testing.T) { + t.Run("When nodeSelector is removed from a Deployment, it should trigger an update and clear the nodeSelector", func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + }, + }, + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{}, + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Spec.Template.Spec.NodeSelector).To(BeEmpty()) + }) + + t.Run("When nodeSelector is removed from a StatefulSet, it should trigger an update and clear the nodeSelector", func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + existingStatefulSet := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sts", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + }, + }, + }, + }, + } + + desiredStatefulSet := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sts", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{}, + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingStatefulSet).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredStatefulSet) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.StatefulSet + err = c.Get(t.Context(), types.NamespacedName{Name: "test-sts", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Spec.Template.Spec.NodeSelector).To(BeEmpty()) + }) + + t.Run("When nodeSelector is partially removed from a Deployment, it should trigger an update and keep only the remaining entries", func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + "topology-zone": "us-east-1a", + }, + }, + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + }, + }, + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Spec.Template.Spec.NodeSelector).To(Equal(map[string]string{ + "node-role": "infra", + })) + }) +} + +// TestApplyManifestLabelValueChange proves that maps.Clone in preserveOriginalMetadata +// prevents the existing object's labels from being mutated in-place. Without the clone, +// changing a label value (same key count) would be invisible to DeepDerivative because +// both existing and desired would share the same underlying map. +func TestApplyManifestLabelValueChange(t *testing.T) { + t.Run("When a label value changes without key count change, it should trigger an update", func(t *testing.T) { + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + "version": "v1", + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + "version": "v2", + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Labels["version"]).To(Equal("v2")) + }) +} + +// TestApplyManifestAnnotationValueChange proves that maps.Clone in preserveOriginalMetadata +// prevents the existing object's annotations from being mutated in-place. Without the clone, +// changing an annotation value (same key count) would be invisible to DeepDerivative because +// both existing and desired would share the same underlying map. +func TestApplyManifestAnnotationValueChange(t *testing.T) { + t.Run("When an annotation value changes without key count change, it should trigger an update", func(t *testing.T) { + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Annotations: map[string]string{ + "config": "old-value", + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Annotations: map[string]string{ + "config": "new-value", + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Annotations["config"]).To(Equal("new-value")) + }) +} + +// TestApplyManifestNodeSelectorValueChange proves that changing a nodeSelector value +// (with constant key count) is detected via DeepDerivative, not getNodeSelectorCount. +// This path is different from nodeSelector removal and relies on preserveOriginalMetadata +// not mutating the existing object's pod template. +func TestApplyManifestNodeSelectorValueChange(t *testing.T) { + t.Run("When a nodeSelector value changes without key count change, it should trigger an update", func(t *testing.T) { + g := NewWithT(t) + + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "infra", + }, + }, + }, + }, + } + + desiredDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-dep", + Namespace: "test-ns", + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role": "worker", + }, + }, + }, + }, + } + + c := fake.NewClientBuilder().WithObjects(existingDeployment).Build() + provider := &applyProvider{} + + result, err := provider.ApplyManifest(t.Context(), c, desiredDeployment) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(controllerutil.OperationResultUpdated)) + + var updated appsv1.Deployment + err = c.Get(t.Context(), types.NamespacedName{Name: "test-dep", Namespace: "test-ns"}, &updated) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updated.Spec.Template.Spec.NodeSelector).To(Equal(map[string]string{ + "node-role": "worker", + })) + }) +} + // TestApplyManifestLabelRemovalOnCreate tests that removal markers are cleaned up // when creating a new object. This is critical because Kubernetes validates label // values during creation, and the removal marker value is not a valid label value.