From d4e7140655fbedd3065bc303ac89b4f4dcfb9091 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Fri, 8 May 2026 15:26:50 -0400 Subject: [PATCH] fix(e2e): fix AllowedCIDRs test for Route-based KAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ValidateKubeAPIServerAllowedCIDRs test fails on v2 Azure self-managed clusters because KAS uses Route publishing strategy (via external-dns-domain), not LoadBalancer. Two fixes: 1. Wait for the downstream LB service (router or KAS LB) to have its LoadBalancerSourceRanges updated by the CPO before asserting KAS reachability. The target service is determined by the HC's APIServer publishing strategy. 2. Create a fresh kubeclient per poll iteration to prevent HTTP/2 connection reuse. Go's HTTP/2 transport multiplexes all requests over a single persistent TCP connection — if a prior request succeeded before Azure NSG rules took effect, subsequent requests bypass the restriction on the same connection. Co-Authored-By: Claude Opus 4.6 --- test/e2e/create_cluster_test.go | 2 +- test/e2e/util/util.go | 85 +++++++++++++++++++--- test/e2e/util/util_test.go | 120 ++++++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+), 10 deletions(-) diff --git a/test/e2e/create_cluster_test.go b/test/e2e/create_cluster_test.go index 98eaaf3d894..a1c6dec5232 100644 --- a/test/e2e/create_cluster_test.go +++ b/test/e2e/create_cluster_test.go @@ -114,8 +114,8 @@ func TestCreateCluster(t *testing.T) { e2eutil.EnsureDefaultSecurityGroupTags(t, ctx, mgtClient, hostedCluster, clusterOpts) if globalOpts.Platform == hyperv1.AzurePlatform { - e2eutil.EnsureKubeAPIServerAllowedCIDRs(t, ctx, mgtClient, guestConfig, hostedCluster) e2eutil.EnsureAzureWorkloadIdentityWebhookMutation(t, ctx, guestClient) + e2eutil.EnsureKubeAPIServerAllowedCIDRs(t, ctx, mgtClient, guestConfig, hostedCluster) } e2eutil.EnsureGlobalPullSecret(t, ctx, mgtClient, hostedCluster, globalOpts.AdditionalPullSecretFile) diff --git a/test/e2e/util/util.go b/test/e2e/util/util.go index 76125493092..37254ef5139 100644 --- a/test/e2e/util/util.go +++ b/test/e2e/util/util.go @@ -3503,17 +3503,23 @@ func ValidateKubeAPIServerAllowedCIDRs(t testing.TB, ctx context.Context, mgmtCl } }) g.Expect(err).NotTo(HaveOccurred(), "failed to restore HostedCluster API server CIDRs") - }() - kubeClient, err := kubeclient.NewForConfig(guestConfig) - g.Expect(err).NotTo(HaveOccurred()) + // Verify KAS is reachable on the original transport before returning. The + // AllowedCIDRs test uses cfg.Dial to create isolated transports, but subsequent + // tests share the original guestConfig's transport. Without this wait, the next + // test may start before Azure LB propagation completes the CIDR restoration. + g.Eventually(func(g Gomega) { + client, err := kubeclient.NewForConfig(guestConfig) + g.Expect(err).ToNot(HaveOccurred(), "failed to create kubeclient for transport recovery") + _, err = client.ServerVersion() + g.Expect(err).ToNot(HaveOccurred(), "KAS should be reachable on original transport after CIDR cleanup") + }).WithContext(ctx).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + }() // ensure that kube-apiserver is not reachable from anywhere - ensureAPIServerAllowedCIDRs(ctx, t, g, mgmtClient, kubeClient, hc, []string{"0.0.0.0/32"}, false) + ensureAPIServerAllowedCIDRs(ctx, t, g, mgmtClient, guestConfig, hc, []string{"0.0.0.0/32"}, false) // ensure kube-apiserver is reachable when allowed CIDRs allow access from everywhere - // This is useful for testing purposes, as it allows us to access the kube-apiserver from any IP - // In a production environment, this should be restricted to specific CIDRs - ensureAPIServerAllowedCIDRs(ctx, t, g, mgmtClient, kubeClient, hc, append([]string{"0.0.0.0/0"}, generateTestCIDRs250()...), true) + ensureAPIServerAllowedCIDRs(ctx, t, g, mgmtClient, guestConfig, hc, append([]string{"0.0.0.0/0"}, generateTestCIDRs250()...), true) } func EnsureKubeAPIServerAllowedCIDRs(t *testing.T, ctx context.Context, mgmtClient crclient.Client, guestConfig *rest.Config, hc *hyperv1.HostedCluster) { @@ -3522,7 +3528,7 @@ func EnsureKubeAPIServerAllowedCIDRs(t *testing.T, ctx context.Context, mgmtClie }) } -func ensureAPIServerAllowedCIDRs(ctx context.Context, t testing.TB, g Gomega, mgmtClient crclient.Client, guestClient *kubeclient.Clientset, hc *hyperv1.HostedCluster, allowedCIDRs []string, shouldBeReachable bool) { +func ensureAPIServerAllowedCIDRs(ctx context.Context, t testing.TB, g Gomega, mgmtClient crclient.Client, guestConfig *rest.Config, hc *hyperv1.HostedCluster, allowedCIDRs []string, shouldBeReachable bool) { expectedCIDRs := make([]hyperv1.CIDRBlock, len(allowedCIDRs)) for i, cidr := range allowedCIDRs { expectedCIDRs[i] = hyperv1.CIDRBlock(cidr) @@ -3561,8 +3567,40 @@ func ensureAPIServerAllowedCIDRs(ctx context.Context, t testing.TB, g Gomega, mg "HCP AllowedCIDRBlocks should match the HostedCluster spec") }).WithContext(ctx).WithTimeout(time.Minute * 3).WithPolling(time.Second * 5).Should(Succeed()) + // Wait for the CPO to reconcile the downstream service with the expected LoadBalancerSourceRanges. + // The target service depends on the APIServer publishing strategy: + // - Route: the "router" LB service carries the CIDRs + // - LoadBalancer: the KAS LB service itself carries the CIDRs + targetSvc := allowedCIDRsTargetService(hc, hcpNamespace) + if targetSvc != nil { + expectedSourceRanges := slices.Clone(allowedCIDRs) + slices.Sort(expectedSourceRanges) + t.Logf("Waiting for service %s/%s LoadBalancerSourceRanges to match %d CIDRs", targetSvc.Namespace, targetSvc.Name, len(expectedSourceRanges)) + g.Eventually(func(g Gomega) { + svc := &corev1.Service{} + err := mgmtClient.Get(ctx, crclient.ObjectKeyFromObject(targetSvc), svc) + g.Expect(err).ToNot(HaveOccurred(), "failed to get service %s/%s", targetSvc.Namespace, targetSvc.Name) + actualSourceRanges := slices.Clone(svc.Spec.LoadBalancerSourceRanges) + slices.Sort(actualSourceRanges) + g.Expect(actualSourceRanges).To(Equal(expectedSourceRanges), + "service %s/%s LoadBalancerSourceRanges should match expected CIDRs", targetSvc.Namespace, targetSvc.Name) + }).WithContext(ctx).WithTimeout(time.Minute * 3).WithPolling(time.Second * 5).Should(Succeed()) + } else { + t.Log("No downstream LB service identified for this cluster configuration; skipping LoadBalancerSourceRanges wait") + } + + // Create a fresh kubeclient per poll to avoid HTTP/2 connection reuse. Go's HTTP/2 + // transport multiplexes requests over a single persistent TCP connection. If a prior + // successful request established a connection before load balancer source-range + // restrictions took effect, all subsequent requests reuse that connection and bypass + // the restriction. Setting cfg.Dial ensures a unique TLS transport cache key per + // iteration, forcing a new TCP connection. g.Eventually(func(g Gomega) { - _, err = guestClient.ServerVersion() + cfg := rest.CopyConfig(guestConfig) + cfg.Dial = (&net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second}).DialContext + freshClient, err := kubeclient.NewForConfig(cfg) + g.Expect(err).ToNot(HaveOccurred(), "failed to create kubeclient") + _, err = freshClient.ServerVersion() if shouldBeReachable { g.Expect(err).ToNot(HaveOccurred(), "kube-apiserver should be reachable") } else { @@ -3571,6 +3609,35 @@ func ensureAPIServerAllowedCIDRs(ctx context.Context, t testing.TB, g Gomega, mg }).WithContext(ctx).WithTimeout(time.Minute * 3).WithPolling(time.Second * 5).Should(Succeed()) } +// allowedCIDRsTargetService returns the LoadBalancer service that enforces AllowedCIDRBlocks +// based on the HostedCluster's APIServer publishing strategy. Returns nil when no LB service +// carries source ranges (private clusters, NodePort, ARO HCP). +// Mirrors CPO's API server and router service reconciliation logic. +func allowedCIDRsTargetService(hc *hyperv1.HostedCluster, hcpNamespace string) *corev1.Service { + if !netutil.IsPublicHC(hc) { + return nil + } + strategy := netutil.ServicePublishingStrategyByTypeByHC(hc, hyperv1.APIServer) + if strategy == nil { + return nil + } + switch strategy.Type { + case hyperv1.Route: + if azureutil.IsAroHCP() { + return nil + } + return cpomanifests.RouterPublicService(hcpNamespace) + case hyperv1.LoadBalancer: + if hc.Spec.Platform.Type == hyperv1.AzurePlatform || + (hc.Annotations != nil && hc.Annotations[hyperv1.ManagementPlatformAnnotation] == string(hyperv1.AzurePlatform)) { + return cpomanifests.KubeAPIServerServiceAzureLB(hcpNamespace) + } + return cpomanifests.KubeAPIServerService(hcpNamespace) + default: + return nil + } +} + // generateTestCIDRs250 is a helper to generate 250 /32 CIDRs starting at 250.250.250.1 func generateTestCIDRs250() []string { cidrs := make([]string, 0, 250) diff --git a/test/e2e/util/util_test.go b/test/e2e/util/util_test.go index 1c5761ae3ec..53ad960232f 100644 --- a/test/e2e/util/util_test.go +++ b/test/e2e/util/util_test.go @@ -7,9 +7,129 @@ import ( . "github.com/onsi/gomega" + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/azureutil" "github.com/openshift/hypershift/support/certs" + + "k8s.io/utils/ptr" ) +func TestAllowedCIDRsTargetService(t *testing.T) { + const ns = "test-hcp" + + publicHC := func(platform hyperv1.PlatformType, svcType hyperv1.PublishingStrategyType) *hyperv1.HostedCluster { + hc := &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{Type: platform}, + Services: []hyperv1.ServicePublishingStrategyMapping{{ + Service: hyperv1.APIServer, + ServicePublishingStrategy: hyperv1.ServicePublishingStrategy{Type: svcType}, + }}, + }, + } + switch platform { + case hyperv1.AWSPlatform: + hc.Spec.Platform.AWS = ptr.To(hyperv1.AWSPlatformSpec{EndpointAccess: hyperv1.Public}) + case hyperv1.AzurePlatform: + hc.Spec.Platform.Azure = ptr.To(hyperv1.AzurePlatformSpec{Topology: hyperv1.AzureTopologyPublic}) + } + return hc + } + + tests := []struct { + name string + hc *hyperv1.HostedCluster + aroHCP bool + wantName string + wantNil bool + }{ + { + name: "When Route strategy on AWS it should return the router service", + hc: publicHC(hyperv1.AWSPlatform, hyperv1.Route), + wantName: "router", + }, + { + name: "When Route strategy on Azure self-managed it should return the router service", + hc: publicHC(hyperv1.AzurePlatform, hyperv1.Route), + wantName: "router", + }, + { + name: "When Route strategy on ARO HCP it should return nil", + hc: publicHC(hyperv1.AzurePlatform, hyperv1.Route), + aroHCP: true, + wantNil: true, + }, + { + name: "When LoadBalancer strategy on Azure it should return the Azure LB service", + hc: publicHC(hyperv1.AzurePlatform, hyperv1.LoadBalancer), + wantName: "kube-apiserverlb", + }, + { + name: "When LoadBalancer strategy with Azure management annotation it should return the Azure LB service", + hc: func() *hyperv1.HostedCluster { + hc := publicHC(hyperv1.NonePlatform, hyperv1.LoadBalancer) + hc.Annotations = map[string]string{ + hyperv1.ManagementPlatformAnnotation: string(hyperv1.AzurePlatform), + } + return hc + }(), + wantName: "kube-apiserverlb", + }, + { + name: "When LoadBalancer strategy on AWS it should return the KAS service", + hc: publicHC(hyperv1.AWSPlatform, hyperv1.LoadBalancer), + wantName: "kube-apiserver", + }, + { + name: "When private Azure cluster it should return nil", + hc: &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AzurePlatform, + Azure: ptr.To(hyperv1.AzurePlatformSpec{Topology: hyperv1.AzureTopologyPrivate}), + }, + Services: []hyperv1.ServicePublishingStrategyMapping{{ + Service: hyperv1.APIServer, + ServicePublishingStrategy: hyperv1.ServicePublishingStrategy{Type: hyperv1.Route}, + }}, + }, + }, + wantNil: true, + }, + { + name: "When NodePort strategy it should return nil", + hc: publicHC(hyperv1.AWSPlatform, hyperv1.NodePort), + wantNil: true, + }, + { + name: "When no APIServer strategy it should return nil", + hc: func() *hyperv1.HostedCluster { + hc := publicHC(hyperv1.AWSPlatform, hyperv1.Route) + hc.Spec.Services = nil + return hc + }(), + wantNil: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + if tc.aroHCP { + azureutil.SetAsAroHCPTest(t) + } + svc := allowedCIDRsTargetService(tc.hc, ns) + if tc.wantNil { + g.Expect(svc).To(BeNil()) + } else { + g.Expect(svc).ToNot(BeNil()) + g.Expect(svc.Name).To(Equal(tc.wantName)) + g.Expect(svc.Namespace).To(Equal(ns)) + } + }) + } +} + // TestGenerateCustomCertificate verifies that our certificate generation works correctly func TestGenerateCustomCertificate(t *testing.T) { testsCases := []struct {