From 0514a1ef39ca5870d19fd85d9fcbaf3b6cb94a68 Mon Sep 17 00:00:00 2001 From: Vimal Solanki Date: Wed, 6 May 2026 19:37:20 +0530 Subject: [PATCH] fix: use NodePort for HCP router Service on non-cloud platforms On non-cloud platforms (Agent, KubeVirt, OpenStack, None), the HCP router Service was unconditionally created as LoadBalancer. Without a cloud LB controller, the Service stays pending forever, blocking InfrastructureReady. - ReconcileRouterService: use NodePort for non-cloud platforms - reconcileRouterServiceStatus: guard against NodePort before calling CollectLBMessageIfNotProvisioned - kas.ReconcileServiceStatus: add NodePort guard in Route case Signed-off-by: Vimal Solanki Closes: OCPBUGS-77856 --- .../hostedcontrolplane/infra/infra.go | 11 ++- .../hostedcontrolplane/infra/infra_test.go | 35 +++++++++ .../hostedcontrolplane/ingress/router.go | 11 ++- .../hostedcontrolplane/ingress/router_test.go | 74 +++++++++++++++++++ .../hostedcontrolplane/kas/service.go | 13 +++- .../hostedcontrolplane/kas/service_test.go | 63 ++++++++++++++++ 6 files changed, 203 insertions(+), 4 deletions(-) diff --git a/control-plane-operator/controllers/hostedcontrolplane/infra/infra.go b/control-plane-operator/controllers/hostedcontrolplane/infra/infra.go index 6f5f85a5190..b933abfe97d 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/infra/infra.go +++ b/control-plane-operator/controllers/hostedcontrolplane/infra/infra.go @@ -698,7 +698,8 @@ func (r *Reconciler) reconcileExternalRouterServiceStatus(ctx context.Context, h return r.reconcileRouterServiceStatus(ctx, manifests.RouterPublicService(hcp.Namespace), events.NewMessageCollector(ctx, r.Client)) } -func (r *Reconciler) reconcileRouterServiceStatus(ctx context.Context, svc *corev1.Service, messageCollector events.MessageCollector) (host string, needed bool, message string, err error) { +func (r *Reconciler) reconcileRouterServiceStatus(ctx context.Context, svc *corev1.Service, messageCollector events.MessageCollector) (host string, + needed bool, message string, err error) { needed = true if err = r.Client.Get(ctx, client.ObjectKeyFromObject(svc), svc); err != nil { if apierrors.IsNotFound(err) { @@ -708,6 +709,14 @@ func (r *Reconciler) reconcileRouterServiceStatus(ctx context.Context, svc *core err = fmt.Errorf("failed to get router service (%s): %w", svc.Name, err) return } + + if svc.Spec.Type == corev1.ServiceTypeNodePort { + if len(svc.Spec.Ports) > 0 && svc.Spec.Ports[0].NodePort != 0 { + host = svc.Spec.ClusterIP + } + return + } + if message, err = k8sutil.CollectLBMessageIfNotProvisioned(svc, messageCollector); err != nil || message != "" { return } diff --git a/control-plane-operator/controllers/hostedcontrolplane/infra/infra_test.go b/control-plane-operator/controllers/hostedcontrolplane/infra/infra_test.go index f344b921cc4..21d52d49716 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/infra/infra_test.go +++ b/control-plane-operator/controllers/hostedcontrolplane/infra/infra_test.go @@ -1738,6 +1738,41 @@ func TestReconcileRouterServiceStatus(t *testing.T) { }, expectedHost: "1.2.3.4", }, + { + name: "When NodePort service with assigned port it should return ClusterIP as host", + svc: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: svcName, Namespace: namespace}, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeNodePort, + ClusterIP: "10.0.0.100", + Ports: []corev1.ServicePort{ + { + Name: "https", + Port: 443, + NodePort: 31443, + }, + }, + }, + }, + expectedHost: "10.0.0.100", + }, + { + name: "When NodePort service without assigned port it should return empty host", + svc: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: svcName, Namespace: namespace}, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeNodePort, + ClusterIP: "10.0.0.100", + Ports: []corev1.ServicePort{ + { + Name: "https", + Port: 443, + }, + }, + }, + }, + expectedHost: "", + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { diff --git a/control-plane-operator/controllers/hostedcontrolplane/ingress/router.go b/control-plane-operator/controllers/hostedcontrolplane/ingress/router.go index cc0e845217e..328e72dc8c8 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/ingress/router.go +++ b/control-plane-operator/controllers/hostedcontrolplane/ingress/router.go @@ -64,7 +64,14 @@ func ReconcileRouterService(svc *corev1.Service, internal, crossZoneLoadBalancin for k, v := range hcpRouterLabels() { svc.Labels[k] = v } - svc.Spec.Type = corev1.ServiceTypeLoadBalancer + if svc.Spec.Type == "" { + switch hcp.Spec.Platform.Type { + case hyperv1.AgentPlatform, hyperv1.KubevirtPlatform, hyperv1.NonePlatform: + svc.Spec.Type = corev1.ServiceTypeNodePort + default: + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + } + } svc.Spec.Selector = hcpRouterLabels() foundHTTPS := false @@ -89,7 +96,7 @@ func ReconcileRouterService(svc *corev1.Service, internal, crossZoneLoadBalancin // Apply LoadBalancerSourceRanges for external router services to restrict CIDR access // Only apply for external (non-internal) services and when not running on ARO HCP allowedCIDRBlocks := netutil.AllowedCIDRBlocks(hcp) - if !internal && !azureutil.IsAroHCP() { + if !internal && !azureutil.IsAroHCP() && svc.Spec.Type == corev1.ServiceTypeLoadBalancer { svc.Spec.LoadBalancerSourceRanges = allowedCIDRBlocks } diff --git a/control-plane-operator/controllers/hostedcontrolplane/ingress/router_test.go b/control-plane-operator/controllers/hostedcontrolplane/ingress/router_test.go index 81dfec25637..3160023481b 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/ingress/router_test.go +++ b/control-plane-operator/controllers/hostedcontrolplane/ingress/router_test.go @@ -1,6 +1,7 @@ package ingress import ( + "fmt" "testing" hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" @@ -178,3 +179,76 @@ func TestReconcileRouterService_GCPInternalLoadBalancer(t *testing.T) { t.Fatalf("expected service selector app to be 'private-router', got %q", svc.Spec.Selector["app"]) } } + +// Test that non-cloud platforms use NodePort +func TestReconcileRouterService_NonCloudPlatformsUseNodePort(t *testing.T) { + platforms := []hyperv1.PlatformType{ + hyperv1.AgentPlatform, + hyperv1.KubevirtPlatform, + hyperv1.NonePlatform, + } + + for _, platform := range platforms { + t.Run(fmt.Sprintf("When platform is %s it should use NodePort", platform), func(t *testing.T) { + hcp := &hyperv1.HostedControlPlane{} + hcp.Spec.Platform.Type = platform + + svc := &corev1.Service{} + + if err := ReconcileRouterService(svc, false, false, hcp); err != nil { + t.Fatalf("ReconcileRouterService returned error: %v", err) + } + + if svc.Spec.Type != corev1.ServiceTypeNodePort { + t.Fatalf("expected service type to be NodePort for %s platform, got %v", platform, svc.Spec.Type) + } + }) + } +} + +// Test that existing service type is preserved on reconcile +func TestReconcileRouterService_ExistingServicePreservesType(t *testing.T) { + t.Run("When existing service is LoadBalancer on non-cloud platform it should not change type", func(t *testing.T) { + hcp := &hyperv1.HostedControlPlane{} + hcp.Spec.Platform.Type = hyperv1.AgentPlatform + + svc := &corev1.Service{} + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + + if err := ReconcileRouterService(svc, false, false, hcp); err != nil { + t.Fatalf("ReconcileRouterService returned error: %v", err) + } + + if svc.Spec.Type != corev1.ServiceTypeLoadBalancer { + t.Fatalf("expected existing LoadBalancer type to be preserved, got %v", svc.Spec.Type) + } + }) +} + +// Test that cloud platforms use LoadBalancer +func TestReconcileRouterService_CloudPlatformsUseLoadBalancer(t *testing.T) { + platforms := []hyperv1.PlatformType{ + hyperv1.AWSPlatform, + hyperv1.GCPPlatform, + hyperv1.AzurePlatform, + hyperv1.PowerVSPlatform, + hyperv1.OpenStackPlatform, + } + + for _, platform := range platforms { + t.Run(fmt.Sprintf("When platform is %s it should use LoadBalancer", platform), func(t *testing.T) { + hcp := &hyperv1.HostedControlPlane{} + hcp.Spec.Platform.Type = platform + + svc := &corev1.Service{} + + if err := ReconcileRouterService(svc, false, false, hcp); err != nil { + t.Fatalf("ReconcileRouterService returned error: %v", err) + } + + if svc.Spec.Type != corev1.ServiceTypeLoadBalancer { + t.Fatalf("expected service type to be LoadBalancer for %s platform, got %v", platform, svc.Spec.Type) + } + }) + } +} diff --git a/control-plane-operator/controllers/hostedcontrolplane/kas/service.go b/control-plane-operator/controllers/hostedcontrolplane/kas/service.go index 1c69a7d4697..0f3ee869b07 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/kas/service.go +++ b/control-plane-operator/controllers/hostedcontrolplane/kas/service.go @@ -174,10 +174,21 @@ func ReconcileServiceStatus(svc *corev1.Service, strategy *hyperv1.ServicePublis port = svc.Spec.Ports[0].NodePort host = strategy.NodePort.Address case hyperv1.Route: + if svc.Spec.Type == corev1.ServiceTypeNodePort { + if len(svc.Spec.Ports) > 0 && svc.Spec.Ports[0].NodePort != 0 { + if strategy.Route != nil { + host = strategy.Route.Hostname + port = 443 + } + } + return + } if message, err := k8sutil.CollectLBMessageIfNotProvisioned(svc, messageCollector); err != nil || message != "" { return host, port, message, err } - host = strategy.Route.Hostname + if strategy.Route != nil { + host = strategy.Route.Hostname + } port = 443 } return diff --git a/control-plane-operator/controllers/hostedcontrolplane/kas/service_test.go b/control-plane-operator/controllers/hostedcontrolplane/kas/service_test.go index 67dfe1e8d4b..ae2ebdcd771 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/kas/service_test.go +++ b/control-plane-operator/controllers/hostedcontrolplane/kas/service_test.go @@ -13,6 +13,8 @@ import ( corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + + "sigs.k8s.io/controller-runtime/pkg/client" ) func TestReconcileService(t *testing.T) { @@ -438,3 +440,64 @@ func TestKonnectivityServiceReconcile(t *testing.T) { }) } } +func TestReconcileServiceStatus(t *testing.T) { + tests := []struct { + name string + svc *corev1.Service + strategy *hyperv1.ServicePublishingStrategy + expectedHost string + expectedPort int32 + }{ + { + name: "When Route strategy with NodePort service it should return route hostname", + svc: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeNodePort, + Ports: []corev1.ServicePort{ + {Port: 443, NodePort: 31443}, + }, + }, + }, + strategy: &hyperv1.ServicePublishingStrategy{ + Type: hyperv1.Route, + Route: &hyperv1.RoutePublishingStrategy{Hostname: "api.example.com"}, + }, + expectedHost: "api.example.com", + expectedPort: 443, + }, + { + name: "When Route strategy with NodePort service without assigned port it should return empty host", + svc: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeNodePort, + Ports: []corev1.ServicePort{ + {Port: 443}, + }, + }, + }, + strategy: &hyperv1.ServicePublishingStrategy{ + Type: hyperv1.Route, + Route: &hyperv1.RoutePublishingStrategy{Hostname: "api.example.com"}, + }, + expectedHost: "", + expectedPort: 0, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + msgCollector := &fakeMessageCollector{} + host, port, msg, err := ReconcileServiceStatus(tc.svc, tc.strategy, 6443, msgCollector) + g.Expect(err).To(BeNil()) + g.Expect(msg).To(BeEmpty()) + g.Expect(host).To(Equal(tc.expectedHost)) + g.Expect(port).To(Equal(tc.expectedPort)) + }) + } +} + +type fakeMessageCollector struct{} + +func (c *fakeMessageCollector) ErrorMessages(_ client.Object) ([]string, error) { + return nil, nil +}