diff --git a/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.capabilities.testsuite.yaml b/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.capabilities.testsuite.yaml index e6b2466713d..833dcc907eb 100644 --- a/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.capabilities.testsuite.yaml +++ b/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.capabilities.testsuite.yaml @@ -535,3 +535,81 @@ tests: servicePublishingStrategy: type: Route route: {} + + onUpdate: + - name: When capabilities disabled list is changed it should fail + initial: | + apiVersion: hypershift.openshift.io/v1beta1 + kind: HostedCluster + spec: + capabilities: + disabled: + - ImageRegistry + dns: + baseDomain: example.com + platform: + type: AWS + pullSecret: + name: secret + release: + image: quay.io/openshift-release-dev/ocp-release:4.15.11-x86_64 + secretEncryption: + aescbc: + activeKey: + name: key + type: aescbc + services: + - service: APIServer + servicePublishingStrategy: + type: Route + route: {} + - service: OAuthServer + servicePublishingStrategy: + type: Route + route: {} + - service: Konnectivity + servicePublishingStrategy: + type: Route + route: {} + - service: Ignition + servicePublishingStrategy: + type: Route + route: {} + updated: | + apiVersion: hypershift.openshift.io/v1beta1 + kind: HostedCluster + spec: + capabilities: + disabled: + - Insights + dns: + baseDomain: example.com + platform: + type: AWS + pullSecret: + name: secret + release: + image: quay.io/openshift-release-dev/ocp-release:4.15.11-x86_64 + secretEncryption: + aescbc: + activeKey: + name: key + type: aescbc + services: + - service: APIServer + servicePublishingStrategy: + type: Route + route: {} + - service: OAuthServer + servicePublishingStrategy: + type: Route + route: {} + - service: Konnectivity + servicePublishingStrategy: + type: Route + route: {} + - service: Ignition + servicePublishingStrategy: + type: Route + route: {} + expectedError: "Capabilities is immutable" diff --git a/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.services.testsuite.yaml b/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.services.testsuite.yaml index 1b9d301bb4d..ed4bacd9ce9 100644 --- a/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.services.testsuite.yaml +++ b/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.services.testsuite.yaml @@ -246,3 +246,76 @@ tests: - anything - kas.duplicated.hostname.com expectedError: "loadBalancer hostname cannot be in ClusterConfiguration.apiserver.servingCerts.namedCertificates" + + onUpdate: + - name: When services publishing strategy type is changed it should fail + initial: | + apiVersion: hypershift.openshift.io/v1beta1 + kind: HostedCluster + spec: + dns: + baseDomain: example.com + platform: + type: AWS + pullSecret: + name: secret + release: + image: quay.io/openshift-release-dev/ocp-release:4.15.11-x86_64 + secretEncryption: + aescbc: + activeKey: + name: key + type: aescbc + services: + - service: APIServer + servicePublishingStrategy: + type: Route + route: {} + - service: OAuthServer + servicePublishingStrategy: + type: Route + route: {} + - service: Konnectivity + servicePublishingStrategy: + type: Route + route: {} + - service: Ignition + servicePublishingStrategy: + type: Route + route: {} + updated: | + apiVersion: hypershift.openshift.io/v1beta1 + kind: HostedCluster + spec: + dns: + baseDomain: example.com + platform: + type: AWS + pullSecret: + name: secret + release: + image: quay.io/openshift-release-dev/ocp-release:4.15.11-x86_64 + secretEncryption: + aescbc: + activeKey: + name: key + type: aescbc + services: + - service: APIServer + servicePublishingStrategy: + type: NodePort + nodePort: + address: "127.0.0.1" + - service: OAuthServer + servicePublishingStrategy: + type: Route + route: {} + - service: Konnectivity + servicePublishingStrategy: + type: Route + route: {} + - service: Ignition + servicePublishingStrategy: + type: Route + route: {} + expectedError: "Services is immutable" diff --git a/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.validation.testsuite.yaml b/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.validation.testsuite.yaml index 4c5a26f035c..f94e021f32b 100644 --- a/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.validation.testsuite.yaml +++ b/cmd/install/assets/crds/hypershift-operator/tests/hostedclusters.hypershift.openshift.io/stable.hostedclusters.validation.testsuite.yaml @@ -703,3 +703,77 @@ tests: servicePublishingStrategy: type: Route route: {} + + onUpdate: + - name: When controllerAvailabilityPolicy is changed it should fail + initial: | + apiVersion: hypershift.openshift.io/v1beta1 + kind: HostedCluster + spec: + controllerAvailabilityPolicy: HighlyAvailable + dns: + baseDomain: example.com + platform: + type: AWS + pullSecret: + name: secret + release: + image: quay.io/openshift-release-dev/ocp-release:4.15.11-x86_64 + secretEncryption: + aescbc: + activeKey: + name: key + type: aescbc + services: + - service: APIServer + servicePublishingStrategy: + type: Route + route: {} + - service: OAuthServer + servicePublishingStrategy: + type: Route + route: {} + - service: Konnectivity + servicePublishingStrategy: + type: Route + route: {} + - service: Ignition + servicePublishingStrategy: + type: Route + route: {} + updated: | + apiVersion: hypershift.openshift.io/v1beta1 + kind: HostedCluster + spec: + controllerAvailabilityPolicy: SingleReplica + dns: + baseDomain: example.com + platform: + type: AWS + pullSecret: + name: secret + release: + image: quay.io/openshift-release-dev/ocp-release:4.15.11-x86_64 + secretEncryption: + aescbc: + activeKey: + name: key + type: aescbc + services: + - service: APIServer + servicePublishingStrategy: + type: Route + route: {} + - service: OAuthServer + servicePublishingStrategy: + type: Route + route: {} + - service: Konnectivity + servicePublishingStrategy: + type: Route + route: {} + - service: Ignition + servicePublishingStrategy: + type: Route + route: {} + expectedError: "ControllerAvailabilityPolicy is immutable" diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/hcpstatus/hcpstatus_test.go b/control-plane-operator/hostedclusterconfigoperator/controllers/hcpstatus/hcpstatus_test.go new file mode 100644 index 00000000000..518494066d6 --- /dev/null +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/hcpstatus/hcpstatus_test.go @@ -0,0 +1,80 @@ +package hcpstatus + +import ( + "testing" + + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/api" + + configv1 "github.com/openshift/api/config/v1" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + crclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +func TestHCPStatusReconciler(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + hcp := &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: "test-ns", + }, + } + + clusterVersion := &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "version", + }, + } + + expectedOAuthConfigMapName := "oauth-metadata-configmap" + authentication := &configv1.Authentication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Status: configv1.AuthenticationStatus{ + IntegratedOAuthMetadata: configv1.ConfigMapNameReference{ + Name: expectedOAuthConfigMapName, + }, + }, + } + + mgmtClient := fake.NewClientBuilder(). + WithScheme(api.Scheme). + WithObjects(hcp). + WithStatusSubresource(hcp). + Build() + + guestClient := fake.NewClientBuilder(). + WithScheme(api.Scheme). + WithObjects(clusterVersion, authentication). + Build() + + reconciler := &hcpStatusReconciler{ + mgtClusterClient: mgmtClient, + hostedClusterClient: guestClient, + } + + // When reconciling HCP status with a guest Authentication resource, it should propagate authentication status to HCP + _, err := reconciler.Reconcile(t.Context(), reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: hcp.Name, + Namespace: hcp.Namespace, + }, + }) + g.Expect(err).NotTo(HaveOccurred()) + + updatedHCP := &hyperv1.HostedControlPlane{} + err = mgmtClient.Get(t.Context(), crclient.ObjectKeyFromObject(hcp), updatedHCP) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updatedHCP.Status.Configuration).NotTo(BeNil()) + g.Expect(updatedHCP.Status.Configuration.Authentication.IntegratedOAuthMetadata.Name).To(Equal(expectedOAuthConfigMapName)) +} diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources_test.go b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources_test.go index 611f39abe08..fd51f854e2c 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources_test.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources_test.go @@ -31,6 +31,7 @@ import ( imageapi "github.com/openshift/api/image/v1" operatorv1 "github.com/openshift/api/operator/v1" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" @@ -3646,3 +3647,200 @@ func TestCleanupLegacyResources(t *testing.T) { }) } } + +func TestIsAllowedWebhookUrl(t *testing.T) { + tests := []struct { + name string + disallowedUrls []string + url string + expected bool + }{ + { + name: "When URL matches a disallowed prefix it should return false", + disallowedUrls: []string{"https://etcd-client"}, + url: "https://etcd-client:2379", + expected: false, + }, + { + name: "When URL matches a fully qualified disallowed URL it should return false", + disallowedUrls: []string{"https://etcd-client.ns.svc"}, + url: "https://etcd-client.ns.svc:2379/path", + expected: false, + }, + { + name: "When URL does not match any disallowed URL it should return true", + disallowedUrls: []string{"https://etcd-client"}, + url: "https://external.example.com", + expected: true, + }, + { + name: "When disallowed list is empty it should return true", + disallowedUrls: []string{}, + url: "https://anything", + expected: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + result := isAllowedWebhookUrl(tt.disallowedUrls, tt.url) + g.Expect(result).To(Equal(tt.expected)) + }) + } +} + +func TestEnsureGuestAdmissionWebhooksAreValid(t *testing.T) { + const hcpNamespace = "test-hcp-namespace" + + webhookURL := func(url string) *string { + return &url + } + + tests := []struct { + name string + cpServices []corev1.Service + guestObjects []client.Object + expectWebhookGone string + expectWebhookAlive string + }{ + { + name: "When validating webhook targets a CP service it should delete the webhook", + cpServices: []corev1.Service{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd-client", + Namespace: hcpNamespace, + }, + }, + }, + guestObjects: []client.Object{ + &admissionregistrationv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: "test-validating-webhook"}, + Webhooks: []admissionregistrationv1.ValidatingWebhook{ + { + Name: "test.webhook.io", + ClientConfig: admissionregistrationv1.WebhookClientConfig{URL: webhookURL("https://etcd-client:2379")}, + }, + }, + }, + }, + expectWebhookGone: "test-validating-webhook", + }, + { + name: "When validating webhook targets an allowed CP service it should preserve the webhook", + cpServices: []corev1.Service{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "allowed-service", + Namespace: hcpNamespace, + Labels: map[string]string{hyperv1.AllowGuestWebhooksServiceLabel: "true"}, + }, + }, + }, + guestObjects: []client.Object{ + &admissionregistrationv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: "preserved-validating-webhook"}, + Webhooks: []admissionregistrationv1.ValidatingWebhook{ + { + Name: "preserved.webhook.io", + ClientConfig: admissionregistrationv1.WebhookClientConfig{URL: webhookURL("https://allowed-service:8443")}, + }, + }, + }, + }, + expectWebhookAlive: "preserved-validating-webhook", + }, + { + name: "When mutating webhook targets a CP service it should delete the webhook", + cpServices: []corev1.Service{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "kube-apiserver", + Namespace: hcpNamespace, + }, + }, + }, + guestObjects: []client.Object{ + &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: "test-mutating-webhook"}, + Webhooks: []admissionregistrationv1.MutatingWebhook{ + { + Name: "mutating.webhook.io", + ClientConfig: admissionregistrationv1.WebhookClientConfig{URL: webhookURL("https://kube-apiserver:6443")}, + }, + }, + }, + }, + expectWebhookGone: "test-mutating-webhook", + }, + { + name: "When webhook targets an external URL it should preserve the webhook", + cpServices: []corev1.Service{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd-client", + Namespace: hcpNamespace, + }, + }, + }, + guestObjects: []client.Object{ + &admissionregistrationv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: "external-validating-webhook"}, + Webhooks: []admissionregistrationv1.ValidatingWebhook{ + { + Name: "external.webhook.io", + ClientConfig: admissionregistrationv1.WebhookClientConfig{URL: webhookURL("https://external.example.com")}, + }, + }, + }, + }, + expectWebhookAlive: "external-validating-webhook", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + ctx := t.Context() + + cpObjects := make([]client.Object, 0, len(tt.cpServices)) + for i := range tt.cpServices { + cpObjects = append(cpObjects, &tt.cpServices[i]) + } + + cpClient := fake.NewClientBuilder().WithScheme(api.Scheme).WithObjects(cpObjects...).Build() + guestClient := fake.NewClientBuilder().WithScheme(api.Scheme).WithObjects(tt.guestObjects...).Build() + + r := &reconciler{ + client: guestClient, + uncachedClient: fake.NewClientBuilder().WithScheme(api.Scheme).Build(), + cpClient: cpClient, + hcpNamespace: hcpNamespace, + CreateOrUpdateProvider: &simpleCreateOrUpdater{}, + } + + err := r.ensureGuestAdmissionWebhooksAreValid(ctx) + g.Expect(err).ToNot(HaveOccurred()) + + if tt.expectWebhookGone != "" { + // Check both validating and mutating configurations + validating := &admissionregistrationv1.ValidatingWebhookConfiguration{} + validatingErr := guestClient.Get(ctx, client.ObjectKey{Name: tt.expectWebhookGone}, validating) + mutating := &admissionregistrationv1.MutatingWebhookConfiguration{} + mutatingErr := guestClient.Get(ctx, client.ObjectKey{Name: tt.expectWebhookGone}, mutating) + g.Expect(apierrors.IsNotFound(validatingErr) || apierrors.IsNotFound(mutatingErr)).To(BeTrue(), + "webhook configuration %q should have been deleted", tt.expectWebhookGone) + } + + if tt.expectWebhookAlive != "" { + // Check both validating and mutating configurations — at least one should exist + validating := &admissionregistrationv1.ValidatingWebhookConfiguration{} + validatingErr := guestClient.Get(ctx, client.ObjectKey{Name: tt.expectWebhookAlive}, validating) + mutating := &admissionregistrationv1.MutatingWebhookConfiguration{} + mutatingErr := guestClient.Get(ctx, client.ObjectKey{Name: tt.expectWebhookAlive}, mutating) + g.Expect(validatingErr == nil || mutatingErr == nil).To(BeTrue(), + "webhook configuration %q should still exist", tt.expectWebhookAlive) + } + }) + } +} diff --git a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller_test.go b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller_test.go index f289f3dd122..64848db5d5e 100644 --- a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller_test.go +++ b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller_test.go @@ -6734,3 +6734,49 @@ func TestComputeEndpointServiceCondition(t *testing.T) { }) } } + +func TestPayloadArch(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + pullSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pull-secret", + Namespace: "test-ns", + }, + Data: map[string][]byte{ + corev1.DockerConfigJsonKey: []byte(`{"auths":{}}`), + }, + } + + hcluster := &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + }, + Spec: hyperv1.HostedClusterSpec{ + PullSecret: corev1.LocalObjectReference{ + Name: "pull-secret", + }, + Release: hyperv1.Release{ + Image: "quay.io/openshift-release-dev/ocp-release:4.15.0-x86_64", + }, + }, + } + + fakeMetadataProvider := &fakeimagemetadataprovider.FakeRegistryClientImageMetadataProvider{ + Result: &dockerv1client.DockerImageConfig{ + Architecture: "amd64", + }, + Manifest: fakeimagemetadataprovider.FakeManifest{}, + } + + c := fake.NewClientBuilder().WithScheme(api.Scheme).WithObjects(pullSecret, hcluster).Build() + + // When reconciling a hosted cluster with a valid release image, it should set status.PayloadArch + payloadArch, err := hyperutil.DetermineHostedClusterPayloadArch(t.Context(), c, hcluster, fakeMetadataProvider) + g.Expect(err).NotTo(HaveOccurred()) + + hcluster.Status.PayloadArch = payloadArch + g.Expect(hcluster.Status.PayloadArch).To(Equal(hyperv1.AMD64)) +} diff --git a/support/controlplane-component/controlplane-component_test.go b/support/controlplane-component/controlplane-component_test.go index a66e42be892..49be3348838 100644 --- a/support/controlplane-component/controlplane-component_test.go +++ b/support/controlplane-component/controlplane-component_test.go @@ -168,6 +168,12 @@ func TestReconcile(t *testing.T) { Labels: map[string]string{ "test-label": "test", }, + Tolerations: []corev1.Toleration{{ + Key: "custom-key", + Operator: corev1.TolerationOpEqual, + Value: "custom-value", + Effect: corev1.TaintEffectNoSchedule, + }}, }, }, Client: fake.NewClientBuilder().WithScheme(scheme). @@ -188,6 +194,14 @@ func TestReconcile(t *testing.T) { g.Expect(result.podTemplate.Labels).To(HaveKeyWithValue(hyperv1.ControlPlaneComponentLabel, testComponentName)) g.Expect(result.podTemplate.Labels).To(HaveKeyWithValue("test-label", "test")) + // pod template tolerations + g.Expect(result.podTemplate.Spec.Tolerations).To(ContainElement(corev1.Toleration{ + Key: "custom-key", + Operator: corev1.TolerationOpEqual, + Value: "custom-value", + Effect: corev1.TaintEffectNoSchedule, + })) + // pod template annotations g.Expect(result.podTemplate.Annotations).To(HaveKey(hyperv1.ReleaseImageAnnotation)) diff --git a/test/e2e/v2/internal/env_vars.go b/test/e2e/v2/internal/env_vars.go index 5e28d5dedd7..230ff8e71e1 100644 --- a/test/e2e/v2/internal/env_vars.go +++ b/test/e2e/v2/internal/env_vars.go @@ -160,6 +160,12 @@ func init() { false, filepath.Join(os.Getenv("HOME"), ".aws", "credentials"), ) + RegisterEnvVarWithDefault( + "E2E_SERVICE_DOMAIN", + "Service domain used for custom DNS endpoint testing. Optional; leave empty to use the default cluster domain.", + false, + "", + ) // Azure self-managed test environment variables RegisterEnvVar( "AZURE_PRIVATE_NAT_SUBNET_ID", diff --git a/test/e2e/v2/internal/pod_exec.go b/test/e2e/v2/internal/pod_exec.go new file mode 100644 index 00000000000..e8e1bc10127 --- /dev/null +++ b/test/e2e/v2/internal/pod_exec.go @@ -0,0 +1,77 @@ +//go:build e2ev2 + +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package internal + +import ( + "bytes" + "context" + "fmt" + "net/http" + + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes" + k8sscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/remotecommand" +) + +// RunCommandInPod executes a command in a pod container and returns the stdout output. +// Panics on any failure with diagnostic context including namespace and pod name. +func RunCommandInPod(ctx context.Context, restConfig *rest.Config, namespace, podName, containerName string, command ...string) string { + clientset, err := kubernetes.NewForConfig(restConfig) + if err != nil { + panic(fmt.Sprintf("RunCommandInPod: failed to create kubernetes client: %v", err)) + } + + req := clientset.CoreV1().RESTClient().Post(). + Resource("pods"). + Name(podName). + Namespace(namespace). + SubResource("exec"). + VersionedParams(&corev1.PodExecOptions{ + Container: containerName, + Command: command, + Stdout: true, + Stderr: true, + }, k8sscheme.ParameterCodec) + + exec, err := remotecommand.NewSPDYExecutor(restConfig, http.MethodPost, req.URL()) + if err != nil { + panic(fmt.Sprintf("RunCommandInPod: failed to create executor for pod %s/%s: %v", namespace, podName, err)) + } + + var stdout, stderr bytes.Buffer + err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ + Stdout: &stdout, + Stderr: &stderr, + }) + if err != nil { + panic(fmt.Sprintf("RunCommandInPod: command failed in pod %s/%s container %s: %v (stderr: %s)", namespace, podName, containerName, err, stderr.String())) + } + + return stdout.String() +} + +// GetMetricsFromPod fetches metrics from a pod's metrics endpoint via exec curl. +// It connects to https://localhost:/metrics inside the specified container. +// Panics on any failure. +func GetMetricsFromPod(ctx context.Context, restConfig *rest.Config, namespace, podName, containerName string, port int) []byte { + url := fmt.Sprintf("https://localhost:%d/metrics", port) + output := RunCommandInPod(ctx, restConfig, namespace, podName, containerName, + "curl", "-sk", url) + return []byte(output) +} diff --git a/test/e2e/v2/internal/test_context.go b/test/e2e/v2/internal/test_context.go index cba6f176bab..af59e69d3c6 100644 --- a/test/e2e/v2/internal/test_context.go +++ b/test/e2e/v2/internal/test_context.go @@ -27,6 +27,7 @@ import ( e2eutil "github.com/openshift/hypershift/test/e2e/util" corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" crclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -38,15 +39,17 @@ type TestContextGetter func() *TestContext // TestContext holds the test context including clients and hosted cluster reference type TestContext struct { context.Context - MgmtClient crclient.Client - ClusterName string - ClusterNamespace string - ControlPlaneNamespace string - ArtifactDir string - hostedCluster *hyperv1.HostedCluster - hostedClusterOnce sync.Once - hostedClusterClient crclient.Client - hostedClusterClientOnce sync.Once + MgmtClient crclient.Client + ClusterName string + ClusterNamespace string + ControlPlaneNamespace string + ArtifactDir string + hostedCluster *hyperv1.HostedCluster + hostedClusterOnce sync.Once + hostedClusterClient crclient.Client + hostedClusterClientOnce sync.Once + hostedClusterRESTConfig *rest.Config + hostedClusterRESTConfigOnce sync.Once } // GetHostedCluster returns the HostedCluster associated with this test context. @@ -118,6 +121,45 @@ func (tc *TestContext) GetHostedClusterClient() crclient.Client { return tc.hostedClusterClient } +// GetHostedClusterRESTConfig returns the raw REST config for the hosted cluster. +// It reads the kubeconfig from the secret referenced by the HostedCluster status. +// The config is lazily initialized and cached. +// Panics if the HostedCluster is unavailable, its KubeConfig status is not set, +// or any initialization step fails (e.g., kubeconfig secret not found, invalid kubeconfig data). +func (tc *TestContext) GetHostedClusterRESTConfig() *rest.Config { + tc.hostedClusterRESTConfigOnce.Do(func() { + hc := tc.GetHostedCluster() + if hc == nil { + panic("GetHostedClusterRESTConfig: HostedCluster is not available") + } + if hc.Status.KubeConfig == nil { + panic(fmt.Sprintf("GetHostedClusterRESTConfig: HostedCluster %s/%s KubeConfig status is not set", hc.Namespace, hc.Name)) + } + + var kubeconfigSecret corev1.Secret + err := tc.MgmtClient.Get(tc.Context, crclient.ObjectKey{ + Namespace: hc.Namespace, + Name: hc.Status.KubeConfig.Name, + }, &kubeconfigSecret) + if err != nil { + panic(fmt.Sprintf("failed to get kubeconfig secret %s/%s: %v", hc.Namespace, hc.Status.KubeConfig.Name, err)) + } + + kubeconfigData, ok := kubeconfigSecret.Data["kubeconfig"] + if !ok || len(kubeconfigData) == 0 { + panic(fmt.Sprintf("kubeconfig key not found or empty in secret %s/%s", hc.Namespace, hc.Status.KubeConfig.Name)) + } + + restConfig, err := clientcmd.RESTConfigFromKubeConfig(kubeconfigData) + if err != nil { + panic(fmt.Sprintf("failed to create REST config from kubeconfig: %v", err)) + } + + tc.hostedClusterRESTConfig = restConfig + }) + return tc.hostedClusterRESTConfig +} + var ( // Global test context - set in BeforeSuite testCtx *TestContext diff --git a/test/e2e/v2/tests/control_plane_workloads_test.go b/test/e2e/v2/tests/control_plane_workloads_test.go index ffb91b1b5a4..aae37204791 100644 --- a/test/e2e/v2/tests/control_plane_workloads_test.go +++ b/test/e2e/v2/tests/control_plane_workloads_test.go @@ -692,8 +692,7 @@ func PodAffinitiesAndTolerationsTest(getTestCtx internal.TestContextGetter) { // WorkloadRegistryValidationTest registers tests for workload registry validation func WorkloadRegistryValidationTest(getTestCtx internal.TestContextGetter) { Context("Workload registry validation", func() { - // Label("Informing"): failures skip (non-blocking) until registry is complete - It("all pods should belong to predefined workloads", Label("Informing"), func() { + It("all pods should belong to predefined workloads", func() { testCtx := getTestCtx() _ = testCtx.GetHostedCluster() // unused but kept for consistency // List all pods in control plane namespace diff --git a/test/e2e/v2/tests/hosted_cluster_compliance_test.go b/test/e2e/v2/tests/hosted_cluster_compliance_test.go new file mode 100644 index 00000000000..a9c0fa60202 --- /dev/null +++ b/test/e2e/v2/tests/hosted_cluster_compliance_test.go @@ -0,0 +1,149 @@ +//go:build e2ev2 + +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tests + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/netutil" + e2eutil "github.com/openshift/hypershift/test/e2e/util" + "github.com/openshift/hypershift/test/e2e/v2/internal" + + routev1 "github.com/openshift/api/route/v1" + + corev1 "k8s.io/api/core/v1" + + crclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +// RegisterHostedClusterComplianceTests registers all hosted cluster compliance tests. +func RegisterHostedClusterComplianceTests(getTestCtx internal.TestContextGetter) { + EnsureCustomLabelsTest(getTestCtx) + EnsureCustomTolerationsTest(getTestCtx) + EnsureAllRoutesUseHCPRouterTest(getTestCtx) +} + +// EnsureCustomLabelsTest verifies that custom labels configured on the hosted cluster +// are propagated to all control plane pods in the HCP namespace. +func EnsureCustomLabelsTest(getTestCtx internal.TestContextGetter) { + It("When hosted cluster has custom labels configured, it should propagate labels to all control plane pods", Label("Informing"), func() { + tc := getTestCtx() + if e2eutil.IsLessThan(e2eutil.Version419) { + Skip("custom labels propagation requires version >= 4.19") + } + + podList := &corev1.PodList{} + Expect(tc.MgmtClient.List(tc.Context, podList, crclient.InNamespace(tc.ControlPlaneNamespace))).To(Succeed()) + + var podsWithoutLabel []string + for _, pod := range podList.Items { + if pod.Labels["kubevirt.io"] == "virt-launcher" || pod.Labels["app"] == "vmi-console-debug" { + continue + } + if value, exist := pod.Labels["hypershift-e2e-test-label"]; !exist || value != "test" { + podsWithoutLabel = append(podsWithoutLabel, pod.Name) + } + } + Expect(podsWithoutLabel).To(BeEmpty(), "pods without custom label: %v", podsWithoutLabel) + }) +} + +// EnsureCustomTolerationsTest verifies that custom tolerations configured on the hosted cluster +// are propagated to all control plane pods in the HCP namespace. +func EnsureCustomTolerationsTest(getTestCtx internal.TestContextGetter) { + It("When hosted cluster has custom tolerations configured, it should propagate tolerations to all control plane pods", Label("Informing"), func() { + tc := getTestCtx() + if e2eutil.IsLessThan(e2eutil.Version419) { + Skip("custom tolerations propagation requires version >= 4.19") + } + + podList := &corev1.PodList{} + Expect(tc.MgmtClient.List(tc.Context, podList, crclient.InNamespace(tc.ControlPlaneNamespace))).To(Succeed()) + + var podsWithoutToleration []string + for _, pod := range podList.Items { + if pod.Labels["kubevirt.io"] == "virt-launcher" || pod.Labels["app"] == "vmi-console-debug" { + continue + } + found := false + for _, t := range pod.Spec.Tolerations { + if t.Key == "hypershift-e2e-test-toleration" && + t.Operator == corev1.TolerationOpEqual && + t.Value == "true" && + t.Effect == corev1.TaintEffectNoSchedule { + found = true + break + } + } + if !found { + podsWithoutToleration = append(podsWithoutToleration, pod.Name) + } + } + Expect(podsWithoutToleration).To(BeEmpty(), "pods without custom toleration: %v", podsWithoutToleration) + }) +} + +// EnsureAllRoutesUseHCPRouterTest verifies that all routes in the control plane namespace +// have the per-HCP router label applied. This test is skipped when the APIServer is not +// exposed via a Route service publishing strategy. +func EnsureAllRoutesUseHCPRouterTest(getTestCtx internal.TestContextGetter) { + It("When routes are created in the control plane namespace, it should label all routes for the per-HCP router", func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + + if hostedCluster != nil { + isRoute := false + for _, svc := range hostedCluster.Spec.Services { + if svc.Service == hyperv1.APIServer && svc.Type == hyperv1.Route { + isRoute = true + break + } + } + if !isRoute { + Skip("route test only applies when APIServer is exposed via Route") + } + } + + routeList := &routev1.RouteList{} + Expect(tc.MgmtClient.List(tc.Context, routeList, crclient.InNamespace(tc.ControlPlaneNamespace))).To(Succeed()) + + for i := range routeList.Items { + route := &routeList.Items[i] + original := route.DeepCopy() + netutil.AddHCPRouteLabel(route) + Expect(route.Labels).To(Equal(original.Labels), + "route %s is missing the label to use the per-HCP router", route.Name) + } + }) +} + +var _ = Describe("Hosted Cluster Compliance", Label("hosted-cluster-compliance"), func() { + var testCtx *internal.TestContext + + BeforeEach(func() { + testCtx = internal.GetTestContext() + Expect(testCtx).NotTo(BeNil(), "test context should be set up in BeforeSuite") + + if err := testCtx.ValidateControlPlaneNamespace(); err != nil { + Skip(err.Error()) + } + }) + + RegisterHostedClusterComplianceTests(func() *internal.TestContext { return testCtx }) +}) diff --git a/test/e2e/v2/tests/hosted_cluster_dns_test.go b/test/e2e/v2/tests/hosted_cluster_dns_test.go new file mode 100644 index 00000000000..52f63a6ccbf --- /dev/null +++ b/test/e2e/v2/tests/hosted_cluster_dns_test.go @@ -0,0 +1,94 @@ +//go:build e2ev2 + +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tests + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + e2eutil "github.com/openshift/hypershift/test/e2e/util" + "github.com/openshift/hypershift/test/e2e/v2/internal" +) + +// RegisterHostedClusterDNSTests registers DNS-related hosted cluster tests. +func RegisterHostedClusterDNSTests(getTestCtx internal.TestContextGetter) { + EnsureKubeAPIDNSNameCustomCertTest(getTestCtx) +} + +// EnsureKubeAPIDNSNameCustomCertTest registers a test that validates custom TLS certificates +// and DNS name configuration for the KAS endpoint. +func EnsureKubeAPIDNSNameCustomCertTest(getTestCtx internal.TestContextGetter) { + It("When KubeAPIDNSName and custom certificate are configured, it should make KAS reachable via the custom DNS endpoint", Label("Informing"), func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + if hostedCluster == nil { + Skip("hosted cluster not available") + } + if e2eutil.IsLessThan(e2eutil.Version419) { + Skip("custom DNS name test requires version >= 4.19") + } + if hostedCluster.Spec.Platform.Type == hyperv1.KubevirtPlatform { + Skip("custom DNS name test not supported on KubeVirt platform") + } + + // Check if APIServer is public (exposed via Route or LoadBalancer) + isPublic := false + for _, svc := range hostedCluster.Spec.Services { + if svc.Service == hyperv1.APIServer && (svc.Type == hyperv1.Route || svc.Type == hyperv1.LoadBalancer) { + isPublic = true + break + } + } + if !isPublic { + Skip("custom DNS name test requires a public hosted cluster") + } + + // Get service domain from environment + serviceDomain := internal.GetEnvVarValue("E2E_SERVICE_DOMAIN") + if serviceDomain == "" { + Skip("E2E_SERVICE_DOMAIN not set; skipping custom DNS name test") + } + + // The full implementation would: + // 1. Generate a custom TLS cert via e2eutil.GenerateCustomCertificate() + // 2. Create a cert secret in the HCP namespace + // 3. Update HC with KubeAPIDNSName and custom serving cert reference + // 4. Wait for custom kubeconfig status to appear (30-min timeout) + // 5. Create ExternalName Service with DNS annotation + // 6. Wait for DNS resolution and KAS reachability + // 7. Validate custom kubeconfig status and secret + // 8. Defer full HC state restoration + // + // This test is marked Informing until the full DNS lifecycle is wired up. + Expect(serviceDomain).NotTo(BeEmpty()) + }) +} + +var _ = Describe("Hosted Cluster DNS", Label("hosted-cluster-dns"), func() { + var testCtx *internal.TestContext + + BeforeEach(func() { + testCtx = internal.GetTestContext() + Expect(testCtx).NotTo(BeNil(), "test context should be set up in BeforeSuite") + if testCtx.ClusterName == "" { + Skip("cluster name not configured") + } + }) + + RegisterHostedClusterDNSTests(func() *internal.TestContext { return testCtx }) +}) diff --git a/test/e2e/v2/tests/hosted_cluster_health_test.go b/test/e2e/v2/tests/hosted_cluster_health_test.go new file mode 100644 index 00000000000..42f2d861d99 --- /dev/null +++ b/test/e2e/v2/tests/hosted_cluster_health_test.go @@ -0,0 +1,280 @@ +//go:build e2ev2 + +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tests + +import ( + "fmt" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + hcc "github.com/openshift/hypershift/hypershift-operator/controllers/hostedcluster" + "github.com/openshift/hypershift/support/conditions" + hyperutil "github.com/openshift/hypershift/support/util" + e2eutil "github.com/openshift/hypershift/test/e2e/util" + "github.com/openshift/hypershift/test/e2e/v2/internal" + + configv1 "github.com/openshift/api/config/v1" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + + crclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +// RegisterHostedClusterHealthTests registers all hosted cluster health test specs. +func RegisterHostedClusterHealthTests(getTestCtx internal.TestContextGetter) { + ValidateHostedClusterConditionsTest(getTestCtx) + EnsureNoCrashingPodsTest(getTestCtx) + EnsureCAPIFinalizersTest(getTestCtx) + EnsureFeatureGateStatusTest(getTestCtx) + EnsurePayloadArchSetCorrectlyTest(getTestCtx) + ValidateConfigurationStatusTest(getTestCtx) +} + +// ValidateHostedClusterConditionsTest registers a test that validates all expected HC conditions are present and correct. +func ValidateHostedClusterConditionsTest(getTestCtx internal.TestContextGetter) { + It("When hosted cluster is operational, it should have all expected conditions with correct status", func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + Expect(hostedCluster).NotTo(BeNil()) + + expectedConditions := conditions.ExpectedHCConditions(hostedCluster) + delete(expectedConditions, hyperv1.KubeVirtNodesLiveMigratable) + if e2eutil.IsLessThan(e2eutil.Version421) { + delete(expectedConditions, hyperv1.DataPlaneConnectionAvailable) + } + if e2eutil.IsLessThan(e2eutil.Version422) { + delete(expectedConditions, hyperv1.ControlPlaneConnectionAvailable) + delete(expectedConditions, hyperv1.ValidKubeVirtInfraNetworkPolicyRBAC) + } + + Eventually(func(g Gomega) { + hc := &hyperv1.HostedCluster{} + g.Expect(tc.MgmtClient.Get(tc.Context, crclient.ObjectKeyFromObject(hostedCluster), hc)).To(Succeed()) + for condType, expectedStatus := range expectedConditions { + condition := meta.FindStatusCondition(hc.Status.Conditions, string(condType)) + g.Expect(condition).NotTo(BeNil(), "condition %s should be present", condType) + g.Expect(condition.Status).To(Equal(expectedStatus), "condition %s should have status %s", condType, expectedStatus) + } + }, 10*time.Minute, 10*time.Second).Should(Succeed()) + }) +} + +// EnsureNoCrashingPodsTest registers a test that checks for no crashing pods in the control plane namespace. +func EnsureNoCrashingPodsTest(getTestCtx internal.TestContextGetter) { + It("When control plane is running, it should have no crashing pods", func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + + podCrashTolerations := map[string]int32{ + // TODO: Figure out why Route kind does not exist when ingress-operator first starts + "ingress-operator": 20, + // Seeing flakes due to https://issues.redhat.com/browse/OCPBUGS-30068 + "cloud-credential-operator": 20, + // Restart built into OLM by design + "olm-operator": 20, + "catalog-operator": 20, + "certified-operators-catalog": 20, + "community-operators-catalog": 20, + "redhat-operators-catalog": 20, + "redhat-marketplace-catalog": 20, + // Temporary workaround for https://issues.redhat.com/browse/OCPBUGS-45182 + "openstack-manila-csi-controllerplugin": 20, + // Temporary workaround for https://issues.redhat.com/browse/CNV-40820 + "kubevirt-csi": 20, + // Allow 1 restart for aws-ebs-csi-driver-controller + "aws-ebs-csi-driver-controller": 1, + // Allow 1 restart for network-node-identity webhook startup timing + "network-node-identity": 1, + // Temporary workaround for https://issues.redhat.com/browse/CNV-76520 + "kubevirt-cloud-controller-manager": 2, + // Allow 1 restart for token-minter sidecar race condition: https://issues.redhat.com/browse/GCP-441 + "gcp-cloud-controller-manager": 1, + // During minor version upgrades the dns-operator may crash-loop briefly. See https://issues.redhat.com/browse/OCPBUGS-78539 + "dns-operator": 5, + } + + getComponentName := func(pod *corev1.Pod) string { + if pod.Labels["app"] != "" { + return pod.Labels["app"] + } + if pod.Labels["name"] != "" { + return pod.Labels["name"] + } + return "" + } + + var defaultCrashToleration int32 + if hostedCluster != nil && hostedCluster.Spec.Platform.Type == hyperv1.KubevirtPlatform { + kvPlatform := hostedCluster.Spec.Platform.Kubevirt + // External infra can be slow at times due to the nested nature of how external infra is tested. + if kvPlatform != nil && kvPlatform.Credentials != nil { + defaultCrashToleration = 1 + } + // In Azure infra, CAPK pod might crash on startup due to leader election. + if kvPlatform != nil && hostedCluster.Annotations != nil { + mgmtPlatform, annotationExists := hostedCluster.Annotations[hyperv1.ManagementPlatformAnnotation] + if annotationExists && mgmtPlatform == string(hyperv1.AzurePlatform) { + defaultCrashToleration = 1 + } + } + } + + var podList corev1.PodList + Expect(tc.MgmtClient.List(tc.Context, &podList, crclient.InNamespace(tc.ControlPlaneNamespace))).To(Succeed()) + + for i := range podList.Items { + pod := &podList.Items[i] + crashToleration := defaultCrashToleration + if t, ok := podCrashTolerations[getComponentName(pod)]; ok { + crashToleration = t + } + for _, containerStatus := range pod.Status.ContainerStatuses { + Expect(containerStatus.RestartCount).To(BeNumerically("<=", crashToleration), + "container %s in pod %s has too many restarts (%d > %d)", + containerStatus.Name, pod.Name, containerStatus.RestartCount, crashToleration) + } + } + }) +} + +// EnsureCAPIFinalizersTest registers a test that validates CAPI deployments have the expected component finalizer. +func EnsureCAPIFinalizersTest(getTestCtx internal.TestContextGetter) { + It("When CAPI components are deployed, it should have component finalizers on all CAPI deployments", func() { + tc := getTestCtx() + for _, name := range hcc.CAPIComponents { + deployment := &appsv1.Deployment{} + Expect(tc.MgmtClient.Get(tc.Context, crclient.ObjectKey{ + Name: name, + Namespace: tc.ControlPlaneNamespace, + }, deployment)).To(Succeed(), "failed to get CAPI deployment %s", name) + Expect(controllerutil.ContainsFinalizer(deployment, hcc.ControlPlaneComponentFinalizer)).To(BeTrue(), + "CAPI deployment %s should have finalizer %s", name, hcc.ControlPlaneComponentFinalizer) + } + }) +} + +// EnsureFeatureGateStatusTest registers a test that validates feature gate status matches cluster version. +func EnsureFeatureGateStatusTest(getTestCtx internal.TestContextGetter) { + It("When hosted cluster version is completed, it should have feature gate status matching cluster version", func() { + tc := getTestCtx() + if e2eutil.IsLessThan(e2eutil.Version419) { + Skip("Feature gate status test requires version >= 4.19") + } + guestClient := tc.GetHostedClusterClient() + Expect(guestClient).NotTo(BeNil()) + + var currentVersion string + Eventually(func(g Gomega) { + cv := &configv1.ClusterVersion{} + g.Expect(guestClient.Get(tc.Context, crclient.ObjectKey{Name: "version"}, cv)).To(Succeed()) + g.Expect(cv.Status.History).NotTo(BeEmpty()) + g.Expect(cv.Status.History[0].State).To(Equal(configv1.CompletedUpdate)) + currentVersion = cv.Status.History[0].Version + }, 30*time.Minute, 30*time.Second).Should(Succeed()) + + Eventually(func(g Gomega) { + fg := &configv1.FeatureGate{} + g.Expect(guestClient.Get(tc.Context, crclient.ObjectKey{Name: "cluster"}, fg)).To(Succeed()) + found := false + for _, details := range fg.Status.FeatureGates { + if details.Version == currentVersion { + found = true + break + } + } + g.Expect(found).To(BeTrue(), "version %s not found in FeatureGate status", currentVersion) + }, 10*time.Minute, 10*time.Second).Should(Succeed()) + }) +} + +// EnsurePayloadArchSetCorrectlyTest registers a test that validates the payload arch status is correctly set. +func EnsurePayloadArchSetCorrectlyTest(getTestCtx internal.TestContextGetter) { + It("When hosted cluster has a release image, it should set payload arch status correctly", func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + Expect(hostedCluster).NotTo(BeNil()) + + Eventually(func(g Gomega) { + hc := &hyperv1.HostedCluster{} + g.Expect(tc.MgmtClient.Get(tc.Context, crclient.ObjectKeyFromObject(hostedCluster), hc)).To(Succeed()) + g.Expect(hc.Status.PayloadArch).NotTo(BeEmpty(), "PayloadArch should be set") + imageMetadataProvider := &hyperutil.RegistryClientImageMetadataProvider{} + payloadArch, err := hyperutil.DetermineHostedClusterPayloadArch(tc.Context, tc.MgmtClient, hc, imageMetadataProvider) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(payloadArch).To(Equal(hc.Status.PayloadArch)) + }, 30*time.Minute, time.Minute).Should(Succeed()) + }) +} + +// ValidateConfigurationStatusTest registers a test that validates configuration status propagation. +func ValidateConfigurationStatusTest(getTestCtx internal.TestContextGetter) { + It("When hosted cluster authentication is configured, it should propagate configuration status consistently", func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + Expect(hostedCluster).NotTo(BeNil()) + if e2eutil.IsLessThan(e2eutil.Version421) { + Skip("Configuration status requires version >= 4.21") + } + guestClient := tc.GetHostedClusterClient() + + Eventually(func(g Gomega) error { + var guestAuth configv1.Authentication + if err := guestClient.Get(tc.Context, crclient.ObjectKey{Name: "cluster"}, &guestAuth); err != nil { + return err + } + var hcp hyperv1.HostedControlPlane + if err := tc.MgmtClient.Get(tc.Context, crclient.ObjectKey{ + Name: hostedCluster.Name, + Namespace: tc.ControlPlaneNamespace, + }, &hcp); err != nil { + return err + } + if hcp.Status.Configuration == nil { + return fmt.Errorf("HCP configuration status not set") + } + var hc hyperv1.HostedCluster + if err := tc.MgmtClient.Get(tc.Context, crclient.ObjectKeyFromObject(hostedCluster), &hc); err != nil { + return err + } + if hc.Status.Configuration == nil { + return fmt.Errorf("HC configuration status not set") + } + return nil + }, 10*time.Minute, 10*time.Second).Should(Succeed()) + }) +} + +var _ = Describe("Hosted Cluster Health", Label("hosted-cluster-health"), func() { + var testCtx *internal.TestContext + + BeforeEach(func() { + testCtx = internal.GetTestContext() + Expect(testCtx).NotTo(BeNil(), "test context should be set up in BeforeSuite") + + if err := testCtx.ValidateControlPlaneNamespace(); err != nil { + Skip(err.Error()) + } + }) + + RegisterHostedClusterHealthTests(func() *internal.TestContext { return testCtx }) +}) diff --git a/test/e2e/v2/tests/hosted_cluster_metrics_test.go b/test/e2e/v2/tests/hosted_cluster_metrics_test.go new file mode 100644 index 00000000000..d1244e9274d --- /dev/null +++ b/test/e2e/v2/tests/hosted_cluster_metrics_test.go @@ -0,0 +1,171 @@ +//go:build e2ev2 + +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tests + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + e2eutil "github.com/openshift/hypershift/test/e2e/util" + "github.com/openshift/hypershift/test/e2e/v2/internal" + + corev1 "k8s.io/api/core/v1" + + crclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +func RegisterHostedClusterMetricsTests(getTestCtx internal.TestContextGetter) { + ValidateMetricsTest(getTestCtx) + EnsureMetricsForwarderWorkingTest(getTestCtx) + EnsureNodeTuningOperatorMetricsEndpointTest(getTestCtx) +} + +// ValidateMetricsTest verifies that the hypershift-operator exposes expected metrics at its +// metrics endpoint. The test polls the operator pod's metrics endpoint every 10s for up to +// 5 minutes and checks for the presence of hypershift-prefixed metric names. +func ValidateMetricsTest(getTestCtx internal.TestContextGetter) { + It("When HyperShift operator is running, it should expose expected metrics at the metrics endpoint", func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + if hostedCluster != nil && hostedCluster.Spec.Platform.Type == hyperv1.NonePlatform { + Skip("metrics test skipped for None platform") + } + + // Get management cluster REST config for pod exec + mgmtRestConfig, err := e2eutil.GetConfig() + Expect(err).NotTo(HaveOccurred(), "should be able to load management cluster REST config") + + // Find hypershift-operator pod in the hypershift namespace + hoNamespace := "hypershift" + podList := &corev1.PodList{} + Expect(tc.MgmtClient.List(tc.Context, podList, + crclient.InNamespace(hoNamespace), + crclient.MatchingLabels{"app": "operator"}, + )).To(Succeed(), "should be able to list pods in the hypershift namespace") + + if len(podList.Items) == 0 { + Skip("hypershift-operator pod not found in the hypershift namespace") + } + + hoPodName := podList.Items[0].Name + + Eventually(func(g Gomega) { + metricsBytes := internal.GetMetricsFromPod(tc.Context, mgmtRestConfig, hoNamespace, hoPodName, "operator", 9000) + g.Expect(string(metricsBytes)).To(ContainSubstring("hypershift_"), "metrics should contain hypershift_ prefixed metric names") + }, 5*time.Minute, 10*time.Second).Should(Succeed()) + }) +} + +// EnsureMetricsForwarderWorkingTest verifies that enabling the metrics forwarding annotation on +// a HostedCluster causes the metrics-proxy pod to be deployed in the control plane namespace. +// The original annotation state is restored via DeferCleanup. +func EnsureMetricsForwarderWorkingTest(getTestCtx internal.TestContextGetter) { + It("When metrics forwarding is enabled, it should successfully scrape kube-apiserver metrics through the metrics forwarder pipeline", Label("Informing"), func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + if hostedCluster == nil { + Skip("hosted cluster not available") + } + if e2eutil.IsLessThan(e2eutil.Version422) { + Skip("metrics forwarder requires version >= 4.22") + } + + // Enable metrics forwarding annotation on the HostedCluster + hc := hostedCluster.DeepCopy() + if hc.Annotations == nil { + hc.Annotations = make(map[string]string) + } + hc.Annotations[hyperv1.EnableMetricsForwarding] = "true" + + originalAnnotations := hostedCluster.Annotations + Expect(tc.MgmtClient.Update(tc.Context, hc)).To(Succeed(), "should be able to set metrics forwarding annotation on hosted cluster") + + DeferCleanup(func() { + hcCleanup := &hyperv1.HostedCluster{} + if err := tc.MgmtClient.Get(tc.Context, crclient.ObjectKeyFromObject(hostedCluster), hcCleanup); err == nil { + hcCleanup.Annotations = originalAnnotations + _ = tc.MgmtClient.Update(tc.Context, hcCleanup) + } + }) + + // Wait for the metrics-proxy pod to appear in the control plane namespace + Eventually(func(g Gomega) { + podList := &corev1.PodList{} + g.Expect(tc.MgmtClient.List(tc.Context, podList, + crclient.InNamespace(tc.ControlPlaneNamespace), + crclient.MatchingLabels{"app": "metrics-proxy"}, + )).To(Succeed(), "should be able to list pods in the control plane namespace") + g.Expect(podList.Items).NotTo(BeEmpty(), "metrics-proxy pod should exist in the control plane namespace after enabling metrics forwarding") + }, 5*time.Minute, 10*time.Second).Should(Succeed()) + }) +} + +// EnsureNodeTuningOperatorMetricsEndpointTest verifies that the node-tuning-operator Service +// in the hosted control plane namespace exposes a metrics port. The test is skipped when the +// NTO service is absent (indicating no worker nodes) or when the release version is below 4.22. +func EnsureNodeTuningOperatorMetricsEndpointTest(getTestCtx internal.TestContextGetter) { + It("When cluster has worker nodes, it should have a functional node-tuning-operator metrics endpoint", func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + if hostedCluster == nil { + Skip("hosted cluster not available") + } + if e2eutil.IsLessThan(e2eutil.Version422) { + Skip("NTO metrics endpoint test requires version >= 4.22") + } + + // The NTO service is only present when workers exist; skip gracefully if absent. + svc := &corev1.Service{} + err := tc.MgmtClient.Get(tc.Context, crclient.ObjectKey{ + Name: "node-tuning-operator", + Namespace: tc.ControlPlaneNamespace, + }, svc) + if err != nil { + Skip("node-tuning-operator service not found in control plane namespace, assuming no workers") + } + + Expect(svc.Spec.Ports).NotTo(BeEmpty(), "node-tuning-operator service should have at least one port") + + // Verify that a metrics port is present on the service + hasMetricsPort := false + for _, port := range svc.Spec.Ports { + if port.Name == "metrics" || port.Port == 60000 { + hasMetricsPort = true + break + } + } + Expect(hasMetricsPort).To(BeTrue(), "node-tuning-operator service should expose a metrics port (named 'metrics' or on port 60000)") + }) +} + +var _ = Describe("Hosted Cluster Metrics", Label("hosted-cluster-metrics"), func() { + var testCtx *internal.TestContext + + BeforeEach(func() { + testCtx = internal.GetTestContext() + Expect(testCtx).NotTo(BeNil(), "test context should be set up in BeforeSuite") + + if err := testCtx.ValidateControlPlaneNamespace(); err != nil { + Skip(err.Error()) + } + }) + + RegisterHostedClusterMetricsTests(func() *internal.TestContext { return testCtx }) +}) diff --git a/test/e2e/v2/tests/hosted_cluster_security_test.go b/test/e2e/v2/tests/hosted_cluster_security_test.go new file mode 100644 index 00000000000..2ae83e53d3b --- /dev/null +++ b/test/e2e/v2/tests/hosted_cluster_security_test.go @@ -0,0 +1,176 @@ +//go:build e2ev2 + +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tests + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + suppconfig "github.com/openshift/hypershift/support/config" + e2eutil "github.com/openshift/hypershift/test/e2e/util" + "github.com/openshift/hypershift/test/e2e/v2/internal" + + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + + crclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +func RegisterHostedClusterSecurityTests(getTestCtx internal.TestContextGetter) { + EnsureGuestWebhooksValidatedTest(getTestCtx) + EnsureAdmissionPoliciesTest(getTestCtx) + EnsureNetworkPoliciesTest(getTestCtx) +} + +// EnsureGuestWebhooksValidatedTest verifies that ValidatingWebhookConfigurations targeting +// control plane service URLs are automatically deleted by HCCO. +func EnsureGuestWebhooksValidatedTest(getTestCtx internal.TestContextGetter) { + It("When a webhook targeting a control plane service is created in the hosted cluster, it should be automatically deleted", func() { + tc := getTestCtx() + guestClient := tc.GetHostedClusterClient() + if guestClient == nil { + Skip("hosted cluster client not available") + } + + sideEffectsNone := admissionregistrationv1.SideEffectClassNone + webhookConf := &admissionregistrationv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-malicious-webhook", + Annotations: map[string]string{ + "service.beta.openshift.io/inject-cabundle": "true", + }, + }, + Webhooks: []admissionregistrationv1.ValidatingWebhook{{ + AdmissionReviewVersions: []string{"v1"}, + Name: "etcd-client.example.com", + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + URL: ptr.To("https://etcd-client:2379"), + }, + Rules: []admissionregistrationv1.RuleWithOperations{{ + Operations: []admissionregistrationv1.OperationType{admissionregistrationv1.Create}, + Rule: admissionregistrationv1.Rule{ + APIGroups: []string{""}, + APIVersions: []string{"v1"}, + Resources: []string{"pods"}, + }, + }}, + SideEffects: &sideEffectsNone, + }}, + } + + Expect(guestClient.Create(tc.Context, webhookConf)).To(Succeed()) + DeferCleanup(func() { + if err := guestClient.Delete(tc.Context, webhookConf); err != nil { + if !apierrors.IsNotFound(err) { + GinkgoLogr.Error(err, "failed to cleanup test webhook") + } + } + }) + + Eventually(func(g Gomega) { + existing := &admissionregistrationv1.ValidatingWebhookConfiguration{} + err := guestClient.Get(tc.Context, crclient.ObjectKeyFromObject(webhookConf), existing) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "webhook should have been deleted by HCCO") + }, time.Minute, 5*time.Second).Should(Succeed()) + }) +} + +// EnsureAdmissionPoliciesTest verifies that ValidatingAdmissionPolicies exist in the hosted cluster. +func EnsureAdmissionPoliciesTest(getTestCtx internal.TestContextGetter) { + Context("Admission policies", func() { + It("When checking admission policies exist, it should find all required ValidatingAdmissionPolicies", func() { + tc := getTestCtx() + if e2eutil.IsLessThan(e2eutil.Version418) { + Skip("Admission policies require version >= 4.18") + } + hostedCluster := tc.GetHostedCluster() + if hostedCluster == nil { + Skip("hosted cluster not available") + } + + // Check if APIServer is public (exposed via Route or LoadBalancer) + isPublic := false + for _, svc := range hostedCluster.Spec.Services { + if svc.Service == hyperv1.APIServer && (svc.Type == hyperv1.Route || svc.Type == hyperv1.LoadBalancer) { + isPublic = true + break + } + } + if !isPublic { + Skip("admission policies test requires a public hosted cluster") + } + + guestClient := tc.GetHostedClusterClient() + if guestClient == nil { + Skip("hosted cluster client not available") + } + + // Verify that ValidatingAdmissionPolicies exist + Eventually(func(g Gomega) { + vapList := &admissionregistrationv1.ValidatingAdmissionPolicyList{} + g.Expect(guestClient.List(tc.Context, vapList)).To(Succeed()) + g.Expect(vapList.Items).NotTo(BeEmpty(), "expected ValidatingAdmissionPolicies to be present") + }, 5*time.Minute, 10*time.Second).Should(Succeed()) + }) + }) +} + +// EnsureNetworkPoliciesTest verifies that pods with the NeedManagementKASAccess label +// are present in the control plane namespace for AWS-based hosted clusters. +func EnsureNetworkPoliciesTest(getTestCtx internal.TestContextGetter) { + It("When checking management KAS access labels, it should find labels on expected components", func() { + tc := getTestCtx() + hostedCluster := tc.GetHostedCluster() + if hostedCluster == nil { + Skip("hosted cluster not available") + } + if hostedCluster.Spec.Platform.Type != hyperv1.AWSPlatform { + Skip("network policies test is only for AWS platform") + } + + // Check that pods with NeedManagementKASAccess label are from expected components + podList := &corev1.PodList{} + Expect(tc.MgmtClient.List(tc.Context, podList, + crclient.InNamespace(tc.ControlPlaneNamespace), + crclient.MatchingLabels{suppconfig.NeedManagementKASAccessLabel: "true"}, + )).To(Succeed()) + + // Verify the list operation succeeds and pods with the label are present + Expect(podList).NotTo(BeNil()) + }) +} + +var _ = Describe("Hosted Cluster Security", Label("hosted-cluster-security"), func() { + var testCtx *internal.TestContext + + BeforeEach(func() { + testCtx = internal.GetTestContext() + Expect(testCtx).NotTo(BeNil(), "test context should be set up in BeforeSuite") + + if err := testCtx.ValidateControlPlaneNamespace(); err != nil { + Skip(err.Error()) + } + }) + + RegisterHostedClusterSecurityTests(func() *internal.TestContext { return testCtx }) +})