From caefe2aa076ea66e8497d0b5d6da1a5f4689d229 Mon Sep 17 00:00:00 2001 From: Max Cao Date: Tue, 12 May 2026 14:28:22 -0700 Subject: [PATCH 1/2] fix(hostedcluster): requeue when AutoNodeEnabled is progressing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HC reconciler reads ControlPlaneComponent status to determine AutoNodeEnabled but does not watch CPCs. Without a requeue, the condition stays stale until an unrelated resource triggers reconciliation — causing the e2e lifecycle test to time out waiting for the condition to flip to True. Return a progressing signal from reconcileAutoNodeEnabledCondition and set a 15s requeue so the condition converges promptly. Co-authored-by: Cursor Signed-off-by: Max Cao --- .../hostedcluster/hostedcluster_controller.go | 10 ++++++-- .../controllers/hostedcluster/karpenter.go | 22 ++++++++++------ .../hostedcluster/karpenter_test.go | 25 +++++++++++++------ 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go index eeb4378fed2..ec6f2e92af6 100644 --- a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go +++ b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go @@ -847,8 +847,8 @@ func (r *HostedClusterReconciler) reconcile(ctx context.Context, req ctrl.Reques propagateControlPlaneVersion(hcluster, hcp) // Set the AutoNodeEnabled condition reflecting both spec intent and actual component rollout progress. - meta.SetStatusCondition(&hcluster.Status.Conditions, - r.reconcileAutoNodeEnabledCondition(ctx, hcluster, controlPlaneNamespace.Name)) + autoNodeCondition, autoNodeProgressing := r.reconcileAutoNodeEnabledCondition(ctx, hcluster, controlPlaneNamespace.Name) + meta.SetStatusCondition(&hcluster.Status.Conditions, autoNodeCondition) // Copy the AWSDefaultSecurityGroupCreated condition from the hostedcontrolplane if hcluster.Spec.Platform.Type == hyperv1.AWSPlatform { @@ -2103,6 +2103,12 @@ func (r *HostedClusterReconciler) reconcile(ctx context.Context, req ctrl.Reques if requeueAfter != nil { result.RequeueAfter = *requeueAfter } + if autoNodeProgressing { + autoNodeRequeue := 15 * time.Second + if result.RequeueAfter == 0 || autoNodeRequeue < result.RequeueAfter { + result.RequeueAfter = autoNodeRequeue + } + } return result, nil } diff --git a/hypershift-operator/controllers/hostedcluster/karpenter.go b/hypershift-operator/controllers/hostedcluster/karpenter.go index ae71cbd7498..80765881d04 100644 --- a/hypershift-operator/controllers/hostedcluster/karpenter.go +++ b/hypershift-operator/controllers/hostedcluster/karpenter.go @@ -164,7 +164,13 @@ func isKASAvailable(ctx context.Context, cpNamespace string, c client.Client) (b // - True / AsExpected — Karpenter enabled in spec AND both components fully rolled out. // - False / AutoNodeProgressing — Enable or disable operation is in progress. // - False / AutoNodeNotConfigured — Karpenter not in spec AND no components present. -func (r *HostedClusterReconciler) reconcileAutoNodeEnabledCondition(ctx context.Context, hcluster *hyperv1.HostedCluster, hcpNamespace string) metav1.Condition { +// +// reconcileAutoNodeEnabledCondition returns the AutoNodeEnabled condition and whether +// the caller should requeue to poll for progress. The second return value is true when +// an enable or disable operation is still in flight; the HC reconciler does not watch +// ControlPlaneComponent resources, so a periodic requeue is needed to pick up status +// changes from the karpenter-operator's CPC updates. +func (r *HostedClusterReconciler) reconcileAutoNodeEnabledCondition(ctx context.Context, hcluster *hyperv1.HostedCluster, hcpNamespace string) (metav1.Condition, bool) { condition := metav1.Condition{ Type: string(hyperv1.AutoNodeEnabled), ObservedGeneration: hcluster.Generation, @@ -178,7 +184,7 @@ func (r *HostedClusterReconciler) reconcileAutoNodeEnabledCondition(ctx context. condition.Status = metav1.ConditionUnknown condition.Reason = hyperv1.AutoNodeEvaluationFailedReason condition.Message = fmt.Sprintf("failed to list ControlPlaneComponents: %v", err) - return condition + return condition, false } // Grab all of our karpenter components @@ -195,7 +201,7 @@ func (r *HostedClusterReconciler) reconcileAutoNodeEnabledCondition(ctx context. condition.Status = metav1.ConditionFalse condition.Reason = hyperv1.AutoNodeProgressingReason condition.Message = "AutoNode is being enabled: waiting for components to be created" - return condition + return condition, true } // Check if they're ready var notReady []string @@ -214,13 +220,13 @@ func (r *HostedClusterReconciler) reconcileAutoNodeEnabledCondition(ctx context. condition.Status = metav1.ConditionFalse condition.Reason = hyperv1.AutoNodeProgressingReason condition.Message = fmt.Sprintf("AutoNode is being enabled: %s", strings.Join(notReady, "; ")) - return condition + return condition, true } // Otherwise report ready condition.Status = metav1.ConditionTrue condition.Reason = hyperv1.AsExpectedReason condition.Message = "AutoNode is ready" - return condition + return condition, false } // Karpenter not enabled — check if Deployments are still terminating. @@ -236,7 +242,7 @@ func (r *HostedClusterReconciler) reconcileAutoNodeEnabledCondition(ctx context. condition.Status = metav1.ConditionUnknown condition.Reason = hyperv1.AutoNodeEvaluationFailedReason condition.Message = fmt.Sprintf("failed to check karpenter deployments: %v", err) - return condition + return condition, false } } @@ -244,11 +250,11 @@ func (r *HostedClusterReconciler) reconcileAutoNodeEnabledCondition(ctx context. condition.Status = metav1.ConditionFalse condition.Reason = hyperv1.AutoNodeProgressingReason condition.Message = fmt.Sprintf("AutoNode is being disabled: waiting for deployments to be removed: %s", strings.Join(runningDeployments, ", ")) - return condition + return condition, true } condition.Status = metav1.ConditionFalse condition.Reason = hyperv1.AutoNodeNotConfiguredReason condition.Message = "AutoNode provisioner is not configured" - return condition + return condition, false } diff --git a/hypershift-operator/controllers/hostedcluster/karpenter_test.go b/hypershift-operator/controllers/hostedcluster/karpenter_test.go index 530293370dc..5649e7b9a84 100644 --- a/hypershift-operator/controllers/hostedcluster/karpenter_test.go +++ b/hypershift-operator/controllers/hostedcluster/karpenter_test.go @@ -297,14 +297,16 @@ func TestReconcileAutoNodeEnabledCondition(t *testing.T) { } tests := map[string]struct { - autoNode hyperv1.AutoNode - components []hyperv1.ControlPlaneComponent - deployments []appsv1.Deployment - want metav1.Condition + autoNode hyperv1.AutoNode + components []hyperv1.ControlPlaneComponent + deployments []appsv1.Deployment + want metav1.Condition + wantProgessing bool }{ "When karpenter is enabled and components not yet created it should report progressing": { - autoNode: karpenterEnabledAutoNode, - components: nil, + autoNode: karpenterEnabledAutoNode, + components: nil, + wantProgessing: true, want: metav1.Condition{ Type: string(hyperv1.AutoNodeEnabled), Status: metav1.ConditionFalse, @@ -319,6 +321,7 @@ func TestReconcileAutoNodeEnabledCondition(t *testing.T) { Status: hyperv1.ControlPlaneComponentStatus{Conditions: []metav1.Condition{rolloutCompleteTrue}}, }, }, + wantProgessing: true, want: metav1.Condition{ Type: string(hyperv1.AutoNodeEnabled), Status: metav1.ConditionFalse, @@ -337,6 +340,7 @@ func TestReconcileAutoNodeEnabledCondition(t *testing.T) { Status: hyperv1.ControlPlaneComponentStatus{Conditions: []metav1.Condition{rolloutCompleteFalse}}, }, }, + wantProgessing: true, want: metav1.Condition{ Type: string(hyperv1.AutoNodeEnabled), Status: metav1.ConditionFalse, @@ -367,6 +371,7 @@ func TestReconcileAutoNodeEnabledCondition(t *testing.T) { {ObjectMeta: metav1.ObjectMeta{Name: karpenterv2.ComponentName, Namespace: hcpNamespace}}, {ObjectMeta: metav1.ObjectMeta{Name: karpenteroperatorv2.ComponentName, Namespace: hcpNamespace}}, }, + wantProgessing: true, want: metav1.Condition{ Type: string(hyperv1.AutoNodeEnabled), Status: metav1.ConditionFalse, @@ -378,6 +383,7 @@ func TestReconcileAutoNodeEnabledCondition(t *testing.T) { deployments: []appsv1.Deployment{ {ObjectMeta: metav1.ObjectMeta{Name: karpenterv2.ComponentName, Namespace: hcpNamespace}}, }, + wantProgessing: true, want: metav1.Condition{ Type: string(hyperv1.AutoNodeEnabled), Status: metav1.ConditionFalse, @@ -439,13 +445,16 @@ func TestReconcileAutoNodeEnabledCondition(t *testing.T) { }, } - got := r.reconcileAutoNodeEnabledCondition(context.Background(), hcluster, hcpNamespace) + got, progressing := r.reconcileAutoNodeEnabledCondition(context.Background(), hcluster, hcpNamespace) got.ObservedGeneration = 0 got.Message = "" got.LastTransitionTime = metav1.Time{} if !equality.Semantic.DeepEqual(tc.want, got) { - t.Errorf("expected %+v, got %+v", tc.want, got) + t.Errorf("condition: expected %+v, got %+v", tc.want, got) + } + if progressing != tc.wantProgessing { + t.Errorf("progressing: expected %v, got %v", tc.wantProgessing, progressing) } }) } From c575c751a9cbb166feff35fd4b79dd73af562f0f Mon Sep 17 00:00:00 2001 From: Max Cao Date: Tue, 12 May 2026 16:10:01 -0700 Subject: [PATCH 2/2] fix(e2e): deep-copy hostedCluster in parallel karpenter subtests The parallel provisioning subtests all captured the same *HostedCluster pointer. When two goroutines concurrently called mgtClient.Get() into that shared object, the JSON deserializer triggered a "concurrent map writes" panic. DeepCopy the pointer at the start of every parallel subtest so each goroutine works on its own object. Co-authored-by: Cursor Signed-off-by: Max Cao --- test/e2e/karpenter_test.go | 80 +++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/test/e2e/karpenter_test.go b/test/e2e/karpenter_test.go index f551d72c941..5fb7d6d3f3c 100644 --- a/test/e2e/karpenter_test.go +++ b/test/e2e/karpenter_test.go @@ -291,6 +291,8 @@ func testARM64Provisioning(ctx context.Context, guestClient crclient.Client, hos t.Parallel() g := NewWithT(t) + hc := hostedCluster.DeepCopy() + if !globalOpts.ConfigurableClusterOptions.AWSMultiArch && !globalOpts.ConfigurableClusterOptions.AzureMultiArch { t.Skip("test only supported on multi-arch clusters") } @@ -299,10 +301,10 @@ func testARM64Provisioning(ctx context.Context, guestClient crclient.Client, hos ObjectMeta: metav1.ObjectMeta{Name: "arm-nodeclass"}, Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ SubnetSelectorTerms: []hyperkarpenterv1.SubnetSelectorTerm{ - {Tags: map[string]string{"karpenter.sh/discovery": hostedCluster.Spec.InfraID}}, + {Tags: map[string]string{"karpenter.sh/discovery": hc.Spec.InfraID}}, }, SecurityGroupSelectorTerms: []hyperkarpenterv1.SecurityGroupSelectorTerm{ - {Tags: map[string]string{"karpenter.sh/discovery": hostedCluster.Spec.InfraID}}, + {Tags: map[string]string{"karpenter.sh/discovery": hc.Spec.InfraID}}, }, }, } @@ -333,7 +335,7 @@ func testARM64Provisioning(ctx context.Context, guestClient crclient.Client, hos "kubernetes.io/arch": "arm64", } - nodes := e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 1, armNodeLabels) + nodes := e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 1, armNodeLabels) waitForReadyKarpenterPods(t, ctx, guestClient, nodes, 1) g.Expect(guestClient.Delete(ctx, armNodePool)).To(Succeed()) @@ -342,7 +344,7 @@ func testARM64Provisioning(ctx context.Context, guestClient crclient.Client, hos t.Logf("Deleted ARM64 workloads") t.Logf("Waiting for Karpenter ARM64 Nodes to disappear") - _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 0, armNodeLabels) + _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 0, armNodeLabels) } } @@ -351,15 +353,15 @@ func testInstanceProfileAnnotation(ctx context.Context, mgtClient, guestClient c t.Parallel() g := NewWithT(t) - // Get the current HostedCluster - err := mgtClient.Get(ctx, crclient.ObjectKeyFromObject(hostedCluster), hostedCluster) + hc := hostedCluster.DeepCopy() + err := mgtClient.Get(ctx, crclient.ObjectKeyFromObject(hc), hc) g.Expect(err).NotTo(HaveOccurred()) // Use the default worker instance profile (typically {infraID}-worker) - workerInstanceProfile := hostedCluster.Spec.InfraID + "-worker" + workerInstanceProfile := hc.Spec.InfraID + "-worker" // Apply the annotation to the HostedCluster - err = e2eutil.UpdateObject(t, ctx, mgtClient, hostedCluster, func(obj *hyperv1.HostedCluster) { + err = e2eutil.UpdateObject(t, ctx, mgtClient, hc, func(obj *hyperv1.HostedCluster) { if obj.Annotations == nil { obj.Annotations = make(map[string]string) } @@ -401,7 +403,7 @@ func testInstanceProfileAnnotation(ctx context.Context, mgtClient, guestClient c karpenterv1.NodePoolLabelKey: testNodePool.Name, } - nodes := e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 1, testNodeLabels) + nodes := e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 1, testNodeLabels) t.Logf("Karpenter nodes are ready") // Verify EC2 instances have the correct instance profile @@ -427,13 +429,13 @@ func testInstanceProfileAnnotation(ctx context.Context, mgtClient, guestClient c t.Logf("Waiting for Karpenter nodes to be deleted") g.Expect(guestClient.Delete(ctx, testWorkLoads)).To(Succeed()) g.Expect(guestClient.Delete(ctx, testNodePool)).To(Succeed()) - _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 0, testNodeLabels) + _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 0, testNodeLabels) // Remove the annotation and verify it gets cleared from EC2NodeClass - err = mgtClient.Get(ctx, crclient.ObjectKeyFromObject(hostedCluster), hostedCluster) + err = mgtClient.Get(ctx, crclient.ObjectKeyFromObject(hc), hc) g.Expect(err).NotTo(HaveOccurred()) - err = e2eutil.UpdateObject(t, ctx, mgtClient, hostedCluster, func(obj *hyperv1.HostedCluster) { + err = e2eutil.UpdateObject(t, ctx, mgtClient, hc, func(obj *hyperv1.HostedCluster) { delete(obj.Annotations, hyperv1.AWSKarpenterDefaultInstanceProfile) }) g.Expect(err).NotTo(HaveOccurred()) @@ -456,13 +458,13 @@ func testNodeClassVersionField(ctx context.Context, mgtClient, guestClient crcli t.Parallel() g := NewWithT(t) - // Re-fetch the hosted cluster to get the latest version status - err := mgtClient.Get(ctx, crclient.ObjectKeyFromObject(hostedCluster), hostedCluster) + hc := hostedCluster.DeepCopy() + err := mgtClient.Get(ctx, crclient.ObjectKeyFromObject(hc), hc) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(hostedCluster.Status.Version).NotTo(BeNil(), "hostedCluster.Status.Version should not be nil") - g.Expect(hostedCluster.Status.Version.Desired.Version).NotTo(BeEmpty(), "hostedCluster.Status.Version.Desired.Version should not be empty") + g.Expect(hc.Status.Version).NotTo(BeNil(), "hostedCluster.Status.Version should not be nil") + g.Expect(hc.Status.Version.Desired.Version).NotTo(BeEmpty(), "hostedCluster.Status.Version.Desired.Version should not be empty") - cpVersion, err := semver.Parse(hostedCluster.Status.Version.Desired.Version) + cpVersion, err := semver.Parse(hc.Status.Version.Desired.Version) g.Expect(err).NotTo(HaveOccurred(), "failed to parse control plane version") t.Logf("Control plane version: %s", cpVersion.String()) @@ -489,8 +491,8 @@ func testNodeClassVersionField(ctx context.Context, mgtClient, guestClient crcli if nc.Status.ReleaseImage == "" { return false, "status.releaseImage is empty", nil } - if nc.Status.ReleaseImage != hostedCluster.Spec.Release.Image { - return false, fmt.Sprintf("expected status.releaseImage %q to match hostedCluster.Spec.Release.Image %q", nc.Status.ReleaseImage, hostedCluster.Spec.Release.Image), nil + if nc.Status.ReleaseImage != hc.Spec.Release.Image { + return false, fmt.Sprintf("expected status.releaseImage %q to match hostedCluster.Spec.Release.Image %q", nc.Status.ReleaseImage, hc.Spec.Release.Image), nil } return true, fmt.Sprintf("status.releaseImage matches control plane: %s", nc.Status.ReleaseImage), nil }), @@ -512,10 +514,10 @@ func testNodeClassVersionField(ctx context.Context, mgtClient, guestClient crcli Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ Version: nodeClassVersion, SubnetSelectorTerms: []hyperkarpenterv1.SubnetSelectorTerm{ - {Tags: map[string]string{"karpenter.sh/discovery": hostedCluster.Spec.InfraID}}, + {Tags: map[string]string{"karpenter.sh/discovery": hc.Spec.InfraID}}, }, SecurityGroupSelectorTerms: []hyperkarpenterv1.SecurityGroupSelectorTerm{ - {Tags: map[string]string{"karpenter.sh/discovery": hostedCluster.Spec.InfraID}}, + {Tags: map[string]string{"karpenter.sh/discovery": hc.Spec.InfraID}}, }, MetadataOptions: hyperkarpenterv1.MetadataOptions{ Access: hyperkarpenterv1.MetadataAccessHTTPEndpoint, @@ -629,7 +631,7 @@ func testNodeClassVersionField(ctx context.Context, mgtClient, guestClient crcli } // Log diagnostic info about the version-test NodeClass infrastructure. - hcpNamespace := manifests.HostedControlPlaneNamespace(hostedCluster.Namespace, hostedCluster.Name) + hcpNamespace := manifests.HostedControlPlaneNamespace(hc.Namespace, hc.Name) secretList := &corev1.SecretList{} if err := mgtClient.List(ctx, secretList, crclient.InNamespace(hcpNamespace), @@ -690,7 +692,7 @@ func testNodeClassVersionField(ctx context.Context, mgtClient, guestClient crcli // NodeClaims don't leak vCPUs into subsequent sequential tests. g.Expect(guestClient.Delete(ctx, testWorkLoads)).To(Succeed()) g.Expect(guestClient.Delete(ctx, testNodePool)).To(Succeed()) - _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 0, testNodeLabels) + _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 0, testNodeLabels) // Verify that a version exceeding the allowed n-3 skew sets SupportedVersionSkew=False. skewMajor, skewMinor, err := supportedversion.PreviousMinorVersion(cpVersion, 4) @@ -765,9 +767,11 @@ func testCapacityReservation(ctx context.Context, mgtClient, guestClient crclien t.Parallel() g := NewWithT(t) + hc := hostedCluster.DeepCopy() + // AutoNode.Provisioner.Karpenter.AWS is required at the API level when karpenter // is configured, so this should never happen/never be nil for a valid karpenter cluster. - if hostedCluster.Spec.AutoNode.Provisioner.Karpenter.Platform != hyperv1.AWSPlatform { + if hc.Spec.AutoNode.Provisioner.Karpenter.Platform != hyperv1.AWSPlatform { t.Skip("HostedCluster does not have a Karpenter AWS platform configured, skipping capacity reservation test") } @@ -890,7 +894,7 @@ func testCapacityReservation(ctx context.Context, mgtClient, guestClient crclien t.Logf("Created workload capacity-reservation-web-app to trigger node provisioning") // Wait for the node to be ready. - nodes := e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 1, crNodeLabels) + nodes := e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 1, crNodeLabels) g.Expect(nodes).To(HaveLen(1)) t.Logf("Node provisioned by capacity-reservation-test NodePool is ready") @@ -908,7 +912,7 @@ func testCapacityReservation(ctx context.Context, mgtClient, guestClient crclien // so stale NodeClaims don't leak vCPUs into subsequent sequential tests. g.Expect(guestClient.Delete(ctx, crWorkload)).To(Succeed()) g.Expect(guestClient.Delete(ctx, crNodePool)).To(Succeed()) - _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 0, crNodeLabels) + _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 0, crNodeLabels) } } @@ -917,13 +921,15 @@ func testArbitrarySubnet(ctx context.Context, mgtClient, guestClient crclient.Cl t.Parallel() g := NewWithT(t) + hc := hostedCluster.DeepCopy() + // Get VPC ID and find an AZ that is: // (a) supported by the VPC endpoint service (to avoid InvalidParameter), and // (b) not already occupied by a VPC subnet (to avoid DuplicateSubnetsInSameZone). // This exercises the real scenario: a customer brings a subnet in a new AZ, // it propagates to the VPC endpoint, and nodes in that AZ can reach the cluster. ec2client := ec2Client(awsCredsFile, awsRegion) - vpcID := hostedCluster.Spec.Platform.AWS.CloudProviderConfig.VPC + vpcID := hc.Spec.Platform.AWS.CloudProviderConfig.VPC subnetsOut, err := ec2client.DescribeSubnets(ctx, &ec2.DescribeSubnetsInput{ Filters: []ec2types.Filter{{Name: aws.String("vpc-id"), Values: []string{vpcID}}}, }) @@ -937,7 +943,7 @@ func testArbitrarySubnet(ctx context.Context, mgtClient, guestClient crclient.Cl } // Get the AZs supported by the VPC endpoint service. - hcpNamespace := manifests.HostedControlPlaneNamespace(hostedCluster.Namespace, hostedCluster.Name) + hcpNamespace := manifests.HostedControlPlaneNamespace(hc.Namespace, hc.Name) esList := &hyperv1.AWSEndpointServiceList{} g.Expect(mgtClient.List(ctx, esList, crclient.InNamespace(hcpNamespace))).To(Succeed()) g.Expect(esList.Items).NotTo(BeEmpty(), "expected at least one AWSEndpointService") @@ -973,7 +979,7 @@ func testArbitrarySubnet(ctx context.Context, mgtClient, guestClient crclient.Cl t.Logf("Selected AZ %s for test subnet (supported by endpoint service, not in VPC)", az) // Create a small test subnet in the VPC. - subnetID, cleanupSubnet := e2eutil.CreateTestSubnet(ctx, t, ec2client, vpcID, az, hostedCluster.Spec.InfraID) + subnetID, cleanupSubnet := e2eutil.CreateTestSubnet(ctx, t, ec2client, vpcID, az, hc.Spec.InfraID) t.Logf("Created test subnet %s in AZ %s", subnetID, az) // Create an OpenshiftEC2NodeClass that selects the subnet by ID. @@ -982,7 +988,7 @@ func testArbitrarySubnet(ctx context.Context, mgtClient, guestClient crclient.Cl Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ SubnetSelectorTerms: []hyperkarpenterv1.SubnetSelectorTerm{{ID: subnetID}}, SecurityGroupSelectorTerms: []hyperkarpenterv1.SecurityGroupSelectorTerm{ - {Tags: map[string]string{"karpenter.sh/discovery": hostedCluster.Spec.InfraID}}, + {Tags: map[string]string{"karpenter.sh/discovery": hc.Spec.InfraID}}, }, }, } @@ -994,7 +1000,7 @@ func testArbitrarySubnet(ctx context.Context, mgtClient, guestClient crclient.Cl } // Wait for the subnet to be removed from the karpenter-subnets ConfigMap. // The karpenter-operator removes it during NodeClass deletion reconciliation. - hcpNS := manifests.HostedControlPlaneNamespace(hostedCluster.Namespace, hostedCluster.Name) + hcpNS := manifests.HostedControlPlaneNamespace(hc.Namespace, hc.Name) if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { cm := &corev1.ConfigMap{} if err := mgtClient.Get(ctx, crclient.ObjectKey{ @@ -1158,7 +1164,7 @@ func testArbitrarySubnet(ctx context.Context, mgtClient, guestClient crclient.Cl testNodeLabels := map[string]string{ karpenterv1.NodePoolLabelKey: testNodePool.Name, } - nodes := e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 1, testNodeLabels) + nodes := e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 1, testNodeLabels) t.Logf("Node launched in arbitrary subnet, verifying it used subnet %s", subnetID) // Verify the launched node's EC2 instance is in the expected subnet. @@ -1181,7 +1187,9 @@ func testKubeletPropagation(ctx context.Context, mgtClient, guestClient crclient t.Parallel() g := NewWithT(t) - hcpNamespace := manifests.HostedControlPlaneNamespace(hostedCluster.Namespace, hostedCluster.Name) + hc := hostedCluster.DeepCopy() + + hcpNamespace := manifests.HostedControlPlaneNamespace(hc.Namespace, hc.Name) // Create a custom OpenshiftEC2NodeClass that the controller does not manage, so that // reconcileOpenshiftEC2NodeClassDefault cannot overwrite spec.kubelet on every reconcile. @@ -1316,11 +1324,11 @@ func testKubeletPropagation(ctx context.Context, mgtClient, guestClient crclient } // Wait for nodes to be provisioned - e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 1, testNodeLabels) + e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 1, testNodeLabels) t.Logf("Karpenter nodes are ready") // Build a clientset for the guest cluster (needed for pod log fetching) - guestConfig := e2eutil.WaitForGuestRestConfig(t, ctx, mgtClient, hostedCluster) + guestConfig := e2eutil.WaitForGuestRestConfig(t, ctx, mgtClient, hc) guestClientset, err := kubeclient.NewForConfig(guestConfig) g.Expect(err).NotTo(HaveOccurred()) @@ -1359,7 +1367,7 @@ func testKubeletPropagation(ctx context.Context, mgtClient, guestClient crclient // Cleanup workloads and NodePool g.Expect(guestClient.Delete(ctx, testWorkLoads)).To(Succeed()) g.Expect(guestClient.Delete(ctx, testNodePool)).To(Succeed()) - _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 0, testNodeLabels) + _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hc.Spec.Platform.Type, 0, testNodeLabels) } }