From 84c626c2962dae16c6c8e29f48740f29d90de9d9 Mon Sep 17 00:00:00 2001 From: enxebre Date: Wed, 27 May 2026 15:22:08 +0200 Subject: [PATCH] fix(webhook): bootstrap serving certs at hypershift operator startup Cherry-pick of #8545 to release-4.22. The webhook server requires TLS certs on disk before it can start listening. Previously, certs were only created via `hypershift install` manifests, meaning the operator would fail to start if the serving cert secret was not yet present like it is the case when running `hypershift install render` without the `--render-sensitive` flag. By adding bootstrap cert generation at startup, the operator is self-sufficient: if the secret exists, the volume mount delivers certs normally; if it is missing or empty, certs are generated, persisted, and written to disk. This also removes the CABundle from CLI-generated webhook configurations and CRD conversion webhooks, since certs are now managed at runtime. Signed-off-by: Alberto Garcia Lamela Co-Authored-By: Claude Opus 4.6 --- cmd/install/assets/hypershift_operator.go | 15 ++- cmd/install/install.go | 20 +--- cmd/install/install_test.go | 2 +- .../webhookcerts/webhookcerts_controller.go | 58 ++++++++++ .../webhookcerts_controller_test.go | 106 ++++++++++++++++++ hypershift-operator/main.go | 7 ++ 6 files changed, 182 insertions(+), 26 deletions(-) diff --git a/cmd/install/assets/hypershift_operator.go b/cmd/install/assets/hypershift_operator.go index 711617609b8..048063eda8b 100644 --- a/cmd/install/assets/hypershift_operator.go +++ b/cmd/install/assets/hypershift_operator.go @@ -614,6 +614,7 @@ func (o HyperShiftOperatorDeployment) Build() *appsv1.Deployment { VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ SecretName: "manager-serving-cert", + Optional: ptr.To(true), }, }, }) @@ -2091,7 +2092,6 @@ func (o HyperShiftReaderClusterRoleBinding) Build() *rbacv1.ClusterRoleBinding { type HyperShiftMutatingWebhookConfiguration struct { Namespace *corev1.Namespace EnableAuditLogPersistence bool - CABundle []byte } const ( @@ -2130,7 +2130,7 @@ func (o HyperShiftMutatingWebhookConfiguration) Build() *admissionregistrationv1 }, }, ClientConfig: admissionregistrationv1.WebhookClientConfig{ - CABundle: o.CABundle, + CABundle: nil, Service: &admissionregistrationv1.ServiceReference{ Namespace: "hypershift", Name: "operator", @@ -2157,7 +2157,7 @@ func (o HyperShiftMutatingWebhookConfiguration) Build() *admissionregistrationv1 }, }, ClientConfig: admissionregistrationv1.WebhookClientConfig{ - CABundle: o.CABundle, + CABundle: nil, Service: &admissionregistrationv1.ServiceReference{ Namespace: "hypershift", Name: "operator", @@ -2198,7 +2198,7 @@ func (o HyperShiftMutatingWebhookConfiguration) Build() *admissionregistrationv1 }, }, ClientConfig: admissionregistrationv1.WebhookClientConfig{ - CABundle: o.CABundle, + CABundle: nil, Service: &admissionregistrationv1.ServiceReference{ Namespace: "hypershift", Name: "operator", @@ -2228,7 +2228,7 @@ func (o HyperShiftMutatingWebhookConfiguration) Build() *admissionregistrationv1 }, }, ClientConfig: admissionregistrationv1.WebhookClientConfig{ - CABundle: o.CABundle, + CABundle: nil, Service: &admissionregistrationv1.ServiceReference{ Namespace: "hypershift", Name: "operator", @@ -2249,7 +2249,6 @@ func (o HyperShiftMutatingWebhookConfiguration) Build() *admissionregistrationv1 type HyperShiftValidatingWebhookConfiguration struct { Namespace string - CABundle []byte } func (o HyperShiftValidatingWebhookConfiguration) Build() *admissionregistrationv1.ValidatingWebhookConfiguration { @@ -2286,7 +2285,7 @@ func (o HyperShiftValidatingWebhookConfiguration) Build() *admissionregistration }, }, ClientConfig: admissionregistrationv1.WebhookClientConfig{ - CABundle: o.CABundle, + CABundle: nil, Service: &admissionregistrationv1.ServiceReference{ Namespace: "hypershift", Name: "operator", @@ -2315,7 +2314,7 @@ func (o HyperShiftValidatingWebhookConfiguration) Build() *admissionregistration }, }, ClientConfig: admissionregistrationv1.WebhookClientConfig{ - CABundle: o.CABundle, + CABundle: nil, Service: &admissionregistrationv1.ServiceReference{ Namespace: "hypershift", Name: "operator", diff --git a/cmd/install/install.go b/cmd/install/install.go index bbe9007d472..ee4a480f21b 100644 --- a/cmd/install/install.go +++ b/cmd/install/install.go @@ -30,7 +30,6 @@ import ( crdassets "github.com/openshift/hypershift/cmd/install/assets/crds" "github.com/openshift/hypershift/cmd/util" "github.com/openshift/hypershift/hypershift-operator/controllers/sharedingress" - "github.com/openshift/hypershift/hypershift-operator/controllers/webhookcerts" hyperapi "github.com/openshift/hypershift/support/api" "github.com/openshift/hypershift/support/config" "github.com/openshift/hypershift/support/metrics" @@ -717,22 +716,10 @@ func hyperShiftOperatorManifests(ctx context.Context, client crclient.Client, op operatorServiceAccount, rbacObjs := setupRBAC(opts, operatorNamespace) objects = append(objects, rbacObjs...) - // Generate self-managed webhook CA and serving cert when any webhook is enabled. - var webhookCABundle []byte - if opts.EnableDefaultingWebhook || opts.EnableConversionWebhook || opts.EnableValidatingWebhook || opts.EnableAuditLogPersistence { - caSecret, servingSecret, caBundle, err := webhookcerts.GenerateInitialWebhookCerts(operatorNamespace.Name, assets.HypershiftOperatorName) - if err != nil { - return nil, nil, fmt.Errorf("failed to generate webhook certs: %w", err) - } - objects = append(objects, caSecret, servingSecret) - webhookCABundle = caBundle - } - if opts.EnableDefaultingWebhook || opts.EnableAuditLogPersistence { mutatingWebhookConfiguration := assets.HyperShiftMutatingWebhookConfiguration{ Namespace: operatorNamespace, EnableAuditLogPersistence: opts.EnableAuditLogPersistence, - CABundle: webhookCABundle, }.Build() objects = append(objects, mutatingWebhookConfiguration) } @@ -740,7 +727,6 @@ func hyperShiftOperatorManifests(ctx context.Context, client crclient.Client, op if opts.EnableValidatingWebhook { validatingWebhookConfiguration := assets.HyperShiftValidatingWebhookConfiguration{ Namespace: operatorNamespace.Name, - CABundle: webhookCABundle, }.Build() objects = append(objects, validatingWebhookConfiguration) } @@ -792,7 +778,7 @@ func hyperShiftOperatorManifests(ctx context.Context, client crclient.Client, op objects = append(objects, sharedIngressObjs...) } - crds, err = setupCRDs(ctx, client, opts, operatorNamespace, operatorService, webhookCABundle) + crds, err = setupCRDs(ctx, client, opts, operatorNamespace, operatorService) if err != nil { return nil, nil, err } @@ -835,7 +821,7 @@ var ipamCRDNames = set.New( // related to etcd are excluded from the list. If the option EnableConversionWebhook is set to true, the CRDs related // to hypershift.openshift.io group are annotated with the necessary annotations to enable the conversion webhook. // If a client is provided, IPAM CRDs that already exist in the cluster are skipped to avoid conflicts. -func setupCRDs(ctx context.Context, client crclient.Client, opts Options, operatorNamespace *corev1.Namespace, operatorService *corev1.Service, webhookCABundle []byte) ([]crclient.Object, error) { +func setupCRDs(ctx context.Context, client crclient.Client, opts Options, operatorNamespace *corev1.Namespace, operatorService *corev1.Service) ([]crclient.Object, error) { // Build a set of existing IPAM CRDs if a client is available existingIPAMCRDs := set.New[string]() if client != nil { @@ -928,7 +914,7 @@ func setupCRDs(ctx context.Context, client crclient.Client, opts Options, operat Port: ptr.To[int32](443), Path: ptr.To("/convert"), }, - CABundle: webhookCABundle, + CABundle: nil, }, ConversionReviewVersions: []string{"v1beta1", "v1alpha1"}, }, diff --git a/cmd/install/install_test.go b/cmd/install/install_test.go index fb350f874c7..8bea4ff17d9 100644 --- a/cmd/install/install_test.go +++ b/cmd/install/install_test.go @@ -408,7 +408,7 @@ func TestSetupCRDs(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { g := NewGomegaWithT(t) - crds, err := setupCRDs(t.Context(), nil, tc.inputOptions, &corev1.Namespace{}, nil, nil) + crds, err := setupCRDs(t.Context(), nil, tc.inputOptions, &corev1.Namespace{}, nil) g.Expect(err).ToNot(HaveOccurred()) nodePoolCRDS := make([]crclient.Object, 0) var machineDeploymentCRD crclient.Object diff --git a/hypershift-operator/controllers/webhookcerts/webhookcerts_controller.go b/hypershift-operator/controllers/webhookcerts/webhookcerts_controller.go index b2d3c7652b4..79852656eec 100644 --- a/hypershift-operator/controllers/webhookcerts/webhookcerts_controller.go +++ b/hypershift-operator/controllers/webhookcerts/webhookcerts_controller.go @@ -33,6 +33,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/predicate" "github.com/go-logr/logr" @@ -364,3 +365,60 @@ func GenerateInitialWebhookCerts(namespace, serviceName string) (*corev1.Secret, caBundle := caSecret.Data[certs.CASignerCertMapKey] return caSecret, servingSecret, caBundle, nil } + +// EnsureWebhookCerts ensures that webhook cert secrets exist so the webhook +// server can start. If the serving cert secret already exists with valid data, +// this is a no-op (the volume mount handles file delivery). If the secret is +// missing or has empty data, new certs are generated and persisted as secrets. +func EnsureWebhookCerts(ctx context.Context, c client.Client, namespace, serviceName string) error { + log := ctrl.LoggerFrom(ctx).WithName("webhook-cert-bootstrap") + + if certsExist(ctx, c, namespace) { + log.Info("Webhook cert secrets already exist with valid data, skipping bootstrap") + return nil + } + + log.Info("Generating webhook certificates") + caSecret, servingSecret, _, err := GenerateInitialWebhookCerts(namespace, serviceName) + if err != nil { + return fmt.Errorf("failed to generate webhook certs: %w", err) + } + + caObj := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: CASecretName, Namespace: namespace}} + if _, err := controllerutil.CreateOrUpdate(ctx, c, caObj, func() error { + caObj.Type = corev1.SecretTypeOpaque + caObj.Data = caSecret.Data + return nil + }); err != nil { + return fmt.Errorf("failed to create or update CA secret: %w", err) + } + + servingObj := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: ServingCertSecretName, Namespace: namespace}} + if _, err := controllerutil.CreateOrUpdate(ctx, c, servingObj, func() error { + servingObj.Type = corev1.SecretTypeTLS + servingObj.Data = servingSecret.Data + return nil + }); err != nil { + return fmt.Errorf("failed to create or update serving cert secret: %w", err) + } + + log.Info("Webhook certificates bootstrapped") + return nil +} + +// certsExist returns true when both the CA and serving cert secrets exist with non-empty data. +func certsExist(ctx context.Context, c client.Client, namespace string) bool { + ca := &corev1.Secret{} + if err := c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: CASecretName}, ca); err != nil { + return false + } + if len(ca.Data[certs.CASignerCertMapKey]) == 0 || len(ca.Data[certs.CASignerKeyMapKey]) == 0 { + return false + } + + serving := &corev1.Secret{} + if err := c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: ServingCertSecretName}, serving); err != nil { + return false + } + return len(serving.Data[corev1.TLSCertKey]) > 0 && len(serving.Data[corev1.TLSPrivateKeyKey]) > 0 +} diff --git a/hypershift-operator/controllers/webhookcerts/webhookcerts_controller_test.go b/hypershift-operator/controllers/webhookcerts/webhookcerts_controller_test.go index ec96cf3dd5b..98bbe6a3d6a 100644 --- a/hypershift-operator/controllers/webhookcerts/webhookcerts_controller_test.go +++ b/hypershift-operator/controllers/webhookcerts/webhookcerts_controller_test.go @@ -2,6 +2,8 @@ package webhookcerts import ( "context" + "crypto/x509" + "encoding/pem" "testing" "time" @@ -410,6 +412,110 @@ func TestGenerateInitialWebhookCerts(t *testing.T) { }) } +func TestEnsureWebhookCerts(t *testing.T) { + t.Run("When no secrets exist it should create secrets", func(t *testing.T) { + g := NewWithT(t) + + cl := fake.NewClientBuilder().WithScheme(newScheme(t)).Build() + + err := EnsureWebhookCerts(t.Context(), cl, "hypershift", "operator") + g.Expect(err).ToNot(HaveOccurred()) + + caSecret := &corev1.Secret{} + g.Expect(cl.Get(t.Context(), client.ObjectKey{Name: CASecretName, Namespace: "hypershift"}, caSecret)).To(Succeed()) + g.Expect(caSecret.Data).To(HaveKey(certs.CASignerCertMapKey)) + + servingSecret := &corev1.Secret{} + g.Expect(cl.Get(t.Context(), client.ObjectKey{Name: ServingCertSecretName, Namespace: "hypershift"}, servingSecret)).To(Succeed()) + g.Expect(servingSecret.Data).To(HaveKey(corev1.TLSCertKey)) + g.Expect(servingSecret.Data).To(HaveKey(corev1.TLSPrivateKeyKey)) + g.Expect(servingSecret.Type).To(Equal(corev1.SecretTypeTLS)) + }) + + t.Run("When secrets already exist with valid data it should not modify them", func(t *testing.T) { + g := NewWithT(t) + + caSecret, servingSecret, _, err := GenerateInitialWebhookCerts("hypershift", "operator") + g.Expect(err).ToNot(HaveOccurred()) + + cl := fake.NewClientBuilder().WithScheme(newScheme(t)).WithObjects(caSecret, servingSecret).Build() + + err = EnsureWebhookCerts(t.Context(), cl, "hypershift", "operator") + g.Expect(err).ToNot(HaveOccurred()) + + updatedServingSecret := &corev1.Secret{} + g.Expect(cl.Get(t.Context(), client.ObjectKey{Name: ServingCertSecretName, Namespace: "hypershift"}, updatedServingSecret)).To(Succeed()) + g.Expect(updatedServingSecret.Data[corev1.TLSCertKey]).To(Equal(servingSecret.Data[corev1.TLSCertKey])) + g.Expect(updatedServingSecret.Data[corev1.TLSPrivateKeyKey]).To(Equal(servingSecret.Data[corev1.TLSPrivateKeyKey])) + + updatedCASecret := &corev1.Secret{} + g.Expect(cl.Get(t.Context(), client.ObjectKey{Name: CASecretName, Namespace: "hypershift"}, updatedCASecret)).To(Succeed()) + g.Expect(updatedCASecret.Data[certs.CASignerCertMapKey]).To(Equal(caSecret.Data[certs.CASignerCertMapKey])) + g.Expect(updatedCASecret.Data[certs.CASignerKeyMapKey]).To(Equal(caSecret.Data[certs.CASignerKeyMapKey])) + }) + + t.Run("When serving cert secret exists with empty data it should regenerate and update", func(t *testing.T) { + g := NewWithT(t) + + emptySecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: ServingCertSecretName, + Namespace: "hypershift", + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{}, + } + + cl := fake.NewClientBuilder().WithScheme(newScheme(t)).WithObjects(emptySecret).Build() + + err := EnsureWebhookCerts(t.Context(), cl, "hypershift", "operator") + g.Expect(err).ToNot(HaveOccurred()) + + updatedServingSecret := &corev1.Secret{} + g.Expect(cl.Get(t.Context(), client.ObjectKey{Name: ServingCertSecretName, Namespace: "hypershift"}, updatedServingSecret)).To(Succeed()) + g.Expect(updatedServingSecret.Data[corev1.TLSCertKey]).ToNot(BeEmpty()) + g.Expect(updatedServingSecret.Data[corev1.TLSPrivateKeyKey]).ToNot(BeEmpty()) + + updatedCASecret := &corev1.Secret{} + g.Expect(cl.Get(t.Context(), client.ObjectKey{Name: CASecretName, Namespace: "hypershift"}, updatedCASecret)).To(Succeed()) + g.Expect(updatedCASecret.Data[certs.CASignerCertMapKey]).ToNot(BeEmpty()) + g.Expect(updatedCASecret.Data[certs.CASignerKeyMapKey]).ToNot(BeEmpty()) + }) + + t.Run("When CA exists but serving cert is missing it should regenerate both secrets", func(t *testing.T) { + g := NewWithT(t) + + caSecret, _, _, err := GenerateInitialWebhookCerts("hypershift", "operator") + g.Expect(err).ToNot(HaveOccurred()) + + cl := fake.NewClientBuilder().WithScheme(newScheme(t)).WithObjects(caSecret).Build() + + err = EnsureWebhookCerts(t.Context(), cl, "hypershift", "operator") + g.Expect(err).ToNot(HaveOccurred()) + + // Both secrets should exist with valid data. + updatedCASecret := &corev1.Secret{} + g.Expect(cl.Get(t.Context(), client.ObjectKey{Name: CASecretName, Namespace: "hypershift"}, updatedCASecret)).To(Succeed()) + g.Expect(updatedCASecret.Data[certs.CASignerCertMapKey]).ToNot(BeEmpty()) + g.Expect(updatedCASecret.Data[certs.CASignerKeyMapKey]).ToNot(BeEmpty()) + + servingSecret := &corev1.Secret{} + g.Expect(cl.Get(t.Context(), client.ObjectKey{Name: ServingCertSecretName, Namespace: "hypershift"}, servingSecret)).To(Succeed()) + g.Expect(servingSecret.Data[corev1.TLSCertKey]).ToNot(BeEmpty()) + g.Expect(servingSecret.Data[corev1.TLSPrivateKeyKey]).ToNot(BeEmpty()) + + // Verify the serving cert was signed by the (possibly replaced) CA. + caPool := x509.NewCertPool() + g.Expect(caPool.AppendCertsFromPEM(updatedCASecret.Data[certs.CASignerCertMapKey])).To(BeTrue()) + block, _ := pem.Decode(servingSecret.Data[corev1.TLSCertKey]) + g.Expect(block).ToNot(BeNil()) + leaf, err := x509.ParseCertificate(block.Bytes) + g.Expect(err).ToNot(HaveOccurred()) + _, err = leaf.Verify(x509.VerifyOptions{Roots: caPool}) + g.Expect(err).ToNot(HaveOccurred()) + }) +} + func TestWebhookDNSNames(t *testing.T) { t.Run("When given service name and namespace it should return correct DNS names", func(t *testing.T) { g := NewWithT(t) diff --git a/hypershift-operator/main.go b/hypershift-operator/main.go index 40461660934..e2ee22940d7 100644 --- a/hypershift-operator/main.go +++ b/hypershift-operator/main.go @@ -24,6 +24,7 @@ import ( hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" awsutil "github.com/openshift/hypershift/cmd/infra/aws/util" + "github.com/openshift/hypershift/cmd/install/assets" pkiconfig "github.com/openshift/hypershift/control-plane-pki-operator/config" etcdrecovery "github.com/openshift/hypershift/etcd-recovery" "github.com/openshift/hypershift/hypershift-operator/controllers/auditlogpersistence" @@ -375,6 +376,12 @@ func run(ctx context.Context, opts *StartOptions, log logr.Logger) error { return fmt.Errorf("failed to construct api reading client: %w", err) } + if opts.CertDir != "" { + if err := webhookcerts.EnsureWebhookCerts(ctx, apiReadingClient, opts.Namespace, assets.HypershiftOperatorName); err != nil { + return fmt.Errorf("failed to bootstrap webhook certs: %w", err) + } + } + // Reconcile deprecation ValidatingAdmissionPolicy if supported if err := reconcileDeprecationValidatingAdmissionPolicy(ctx, apiReadingClient, mgmtClusterCaps, log); err != nil { return fmt.Errorf("failed to reconcile deprecation ValidatingAdmissionPolicy: %w", err)