From f93ce72f8822751f578eaa5ae78714ae761bd557 Mon Sep 17 00:00:00 2001 From: Ryan Zhang Date: Tue, 20 Jan 2026 14:57:00 -0800 Subject: [PATCH 1/3] feat: skip update condition of clusterProfile (#409) do not update the status if it's identical Signed-off-by: Ryan Zhang --- .../clusterprofile/controller.go | 43 ++++++++++++------- .../controller_integration_test.go | 24 +++++++++++ 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/pkg/controllers/clusterinventory/clusterprofile/controller.go b/pkg/controllers/clusterinventory/clusterprofile/controller.go index 52c01a7ee..1aeea73f7 100644 --- a/pkg/controllers/clusterinventory/clusterprofile/controller.go +++ b/pkg/controllers/clusterinventory/clusterprofile/controller.go @@ -22,6 +22,7 @@ import ( "fmt" "time" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -128,30 +129,25 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu // Note that if the object already exists and its spec matches with the desired space, no // update op will be performed. createOrUpdateRes, err := controllerutil.CreateOrUpdate(ctx, r, cp, func() error { - if cp.CreationTimestamp.IsZero() { - // Only set the ClusterManager field if the object is being created; this field - // is immutable by definition. - cp.Spec = clusterinventory.ClusterProfileSpec{ - ClusterManager: clusterinventory.ClusterManager{ - Name: controller.ClusterManagerName, - }, + if !cp.CreationTimestamp.IsZero() { + // log an unexpected error if the cluster profile content is modified behind our back + if cp.Spec.DisplayName != mc.Name || cp.Labels == nil || cp.Labels[clusterinventory.LabelClusterManagerKey] != controller.ClusterManagerName { + klog.ErrorS(controller.NewUnexpectedBehaviorError(fmt.Errorf("found an unexpected cluster profile: spec = `%+v`, label = `%+v`", cp.Spec, cp.Labels)), + "Cluster profile is modified behind our back", "memberCluster", mcRef, "clusterProfile", klog.KObj(cp)) } } - // log an unexpected error if the cluster profile is under the management of a different platform. - if cp.Spec.ClusterManager.Name != controller.ClusterManagerName { - klog.ErrorS(controller.NewUnexpectedBehaviorError(fmt.Errorf("found another clustrer Manager: `%s`", cp.Spec.ClusterManager.Name)), - "Cluster profile is under the management of a different platform", "memberCluster", mcRef, "clusterProfile", klog.KObj(cp)) - return nil + // Set the spec. + cp.Spec = clusterinventory.ClusterProfileSpec{ + ClusterManager: clusterinventory.ClusterManager{ + Name: controller.ClusterManagerName, + }, + DisplayName: mc.Name, } - // Set the labels. if cp.Labels == nil { cp.Labels = make(map[string]string) } cp.Labels[clusterinventory.LabelClusterManagerKey] = controller.ClusterManagerName - - // Set the display name. - cp.Spec.DisplayName = mc.Name return nil }) if err != nil { @@ -160,9 +156,15 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu } klog.V(2).InfoS("Cluster profile object is created or updated", "memberCluster", mcRef, "clusterProfile", klog.KObj(cp), "operation", createOrUpdateRes) + existingCP := cp.DeepCopy() // sync the cluster profile status/condition from the member cluster condition r.fillInClusterStatus(mc, cp) r.syncClusterProfileCondition(mc, cp) + // skip status update if nothing changed + if equality.Semantic.DeepEqual(existingCP.Status, cp.Status) { + klog.V(2).InfoS("No need to update Cluster profile status", "memberCluster", mcRef, "clusterProfile", klog.KObj(cp)) + return ctrl.Result{}, nil + } if err = r.Status().Update(ctx, cp); err != nil { klog.ErrorS(err, "Failed to update cluster profile status", "memberCluster", mcRef, "clusterProfile", klog.KObj(cp)) return ctrl.Result{}, err @@ -308,6 +310,15 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { NamespacedName: types.NamespacedName{Name: e.Object.GetName()}, }) }, + UpdateFunc: func(ctx context.Context, e event.UpdateEvent, q workqueue.TypedRateLimitingInterface[reconcile.Request]) { + if e.ObjectNew.GetGeneration() == e.ObjectOld.GetGeneration() { + return + } + klog.V(2).InfoS("Handling a clusterProfil spec update event", "clusterProfile", klog.KObj(e.ObjectOld)) + q.Add(reconcile.Request{ + NamespacedName: types.NamespacedName{Name: e.ObjectOld.GetName()}, + }) + }, }). Complete(r) } diff --git a/pkg/controllers/clusterinventory/clusterprofile/controller_integration_test.go b/pkg/controllers/clusterinventory/clusterprofile/controller_integration_test.go index 387a6d16f..1f90b415a 100644 --- a/pkg/controllers/clusterinventory/clusterprofile/controller_integration_test.go +++ b/pkg/controllers/clusterinventory/clusterprofile/controller_integration_test.go @@ -30,6 +30,7 @@ import ( clusterv1beta1 "github.com/kubefleet-dev/kubefleet/apis/cluster/v1beta1" "github.com/kubefleet-dev/kubefleet/pkg/utils" "github.com/kubefleet-dev/kubefleet/pkg/utils/condition" + "github.com/kubefleet-dev/kubefleet/pkg/utils/controller" ) const ( @@ -88,6 +89,12 @@ var _ = Describe("Test ClusterProfile Controller", func() { if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: clusterProfileNS, Name: testMCName}, &clusterProfile); err != nil { return false } + if clusterProfile.Spec.ClusterManager.Name != controller.ClusterManagerName { + return false + } + if clusterProfile.Spec.DisplayName != testMCName { + return false + } cond := meta.FindStatusCondition(clusterProfile.Status.Conditions, clusterinventory.ClusterConditionControlPlaneHealthy) return condition.IsConditionStatusTrue(cond, clusterProfile.Generation) }, eventuallyTimeout, interval).Should(BeTrue(), "clusterProfile is not created") @@ -106,6 +113,23 @@ var _ = Describe("Test ClusterProfile Controller", func() { }, eventuallyTimeout, interval).Should(Succeed(), "clusterProfile is not created") }) + It("Should update a clusterProfile when it is modified by the user", func() { + By("Check the clusterProfile is created") + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{Namespace: clusterProfileNS, Name: testMCName}, &clusterProfile) + }, eventuallyTimeout, interval).Should(Succeed(), "clusterProfile is not created") + By("Modifying the ClusterProfile") + clusterProfile.Spec.DisplayName = "ModifiedMCName" + Expect(k8sClient.Update(ctx, &clusterProfile)).Should(Succeed(), "failed to modify clusterProfile") + By("Check the clusterProfile is updated back to original state") + Eventually(func() bool { + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: clusterProfileNS, Name: testMCName}, &clusterProfile); err != nil { + return false + } + return clusterProfile.Spec.DisplayName == testMCName + }, eventuallyTimeout, interval).Should(BeTrue(), "clusterProfile is not updated back to original state") + }) + It("Should delete the clusterProfile when the MemberCluster is deleted", func() { By("Check the clusterProfile is created") Eventually(func() error { From 08f28805dc39591a3ba51eaad237efdb35a960ed Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 22 Jan 2026 01:06:08 +1100 Subject: [PATCH 2/3] feat: add logs to the readiness check handler (#415) Added some logs Signed-off-by: michaelawyu --- pkg/utils/informer/readiness/readiness.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pkg/utils/informer/readiness/readiness.go b/pkg/utils/informer/readiness/readiness.go index 749be5592..f46c4eb4b 100644 --- a/pkg/utils/informer/readiness/readiness.go +++ b/pkg/utils/informer/readiness/readiness.go @@ -25,13 +25,19 @@ import ( "k8s.io/klog/v2" ) +// TO-DO (chenyu1): the readiness check below verifies if all the caches have been sync'd; +// as an informer is considered synced once it has performed its initial list, checking the +// sync status repeatedly (be default every 10 seconds) might not be very performant. We should +// find a better way to handle this situation. + // InformerReadinessChecker creates a readiness check function that verifies // all resource informer caches are synced before marking the pod as ready. // This prevents components from processing requests before the discovery cache is populated. func InformerReadinessChecker(resourceInformer informer.Manager) func(*http.Request) error { return func(_ *http.Request) error { if resourceInformer == nil { - return fmt.Errorf("resource informer not initialized") + klog.V(2).InfoS("Readiness check failed: resource informer is nil") + return fmt.Errorf("resource informer is nil") } // Require ALL informer caches to be synced before marking ready @@ -39,6 +45,7 @@ func InformerReadinessChecker(resourceInformer informer.Manager) func(*http.Requ if len(allResources) == 0 { // This can happen during startup when the ResourceInformer is created but the InformerPopulator // hasn't discovered and registered any resources yet via AddDynamicResources(). + klog.V(2).InfoS("Readiness check failed: no resources registered in resource informer yet") return fmt.Errorf("resource informer not ready: no resources registered") } @@ -51,6 +58,9 @@ func InformerReadinessChecker(resourceInformer informer.Manager) func(*http.Requ } if len(unsyncedResources) > 0 { + klog.V(2).InfoS("Readiness check failed: some resource informers are not synced yet", + "countOfUnsyncedInformers", len(unsyncedResources), + "countOfTotalInformers", len(allResources)) return fmt.Errorf("resource informer not ready: %d/%d informers not synced yet", len(unsyncedResources), len(allResources)) } From 01e1b48ac3fd197d2fa055cd2a3b9383dc5b340e Mon Sep 17 00:00:00 2001 From: Wei Weng Date: Wed, 21 Jan 2026 16:55:01 -0500 Subject: [PATCH 3/3] feat: enable HA hub agents by optionally depending on cert manager (#366) --- .github/workflows/ci.yml | 2 + charts/hub-agent/README.md | 81 ++- charts/hub-agent/templates/certificate.yaml | 63 +++ charts/hub-agent/templates/deployment.yaml | 21 + charts/hub-agent/values.yaml | 11 +- cmd/hubagent/main.go | 47 +- cmd/hubagent/options/options.go | 4 + cmd/hubagent/options/validation.go | 4 + cmd/hubagent/options/validation_test.go | 30 ++ pkg/webhook/webhook.go | 153 +++++- pkg/webhook/webhook_test.go | 524 +++++++++++++++++++- test/e2e/setup.sh | 21 +- test/utils/manager/manager.go | 35 ++ 13 files changed, 959 insertions(+), 37 deletions(-) create mode 100644 charts/hub-agent/templates/certificate.yaml create mode 100644 test/utils/manager/manager.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dbec004a2..df7ab6a51 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,6 +14,7 @@ on: env: GO_VERSION: '1.24.9' + CERT_MANAGER_VERSION: 'v1.16.2' jobs: detect-noop: @@ -125,6 +126,7 @@ jobs: PROPERTY_PROVIDER: 'azure' RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL: ${{ matrix.resource-snapshot-creation-minimum-interval }} RESOURCE_CHANGES_COLLECTION_DURATION: ${{ matrix.resource-changes-collection-duration }} + CERT_MANAGER_VERSION: ${{ env.CERT_MANAGER_VERSION }} - name: Collect logs if: always() diff --git a/charts/hub-agent/README.md b/charts/hub-agent/README.md index 3f44d29a4..5ab3221eb 100644 --- a/charts/hub-agent/README.md +++ b/charts/hub-agent/README.md @@ -2,11 +2,33 @@ ## Install Chart +### Default Installation (Self-Signed Certificates) + ```console # Helm install with fleet-system namespace already created helm install hub-agent ./charts/hub-agent/ ``` +### Installation with cert-manager + +When using cert-manager for certificate management, install cert-manager as a prerequisite first: + +```console +# Install cert-manager (omit --version to get latest, or specify a version like --version v1.16.2) +# Note: See CERT_MANAGER_VERSION in .github/workflows/ci.yml for the version tested in CI +helm repo add jetstack https://charts.jetstack.io +helm repo update +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --set crds.enabled=true + +# Then install hub-agent with cert-manager enabled +helm install hub-agent ./charts/hub-agent --set useCertManager=true --set enableWorkload=true --set enableWebhook=true +``` + +This configures cert-manager to manage webhook certificates. + ## Upgrade Chart ```console @@ -32,6 +54,12 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen | `affinity` | Node affinity for hub-agent pods | `{}` | | `tolerations` | Tolerations for hub-agent pods | `[]` | | `logVerbosity` | Log level (klog V logs) | `5` | +| `enableWebhook` | Enable webhook server | `true` | +| `webhookServiceName` | Webhook service name | `fleetwebhook` | +| `enableGuardRail` | Enable guard rail webhook configurations | `true` | +| `webhookClientConnectionType` | Connection type for webhook client (service or url) | `service` | +| `useCertManager` | Use cert-manager for webhook certificate management (requires `enableWorkload=true`) | `false` | +| `webhookCertSecretName` | Name of the Secret where cert-manager stores the certificate | `fleet-webhook-server-cert` | | `enableV1Beta1APIs` | Watch for v1beta1 APIs | `true` | | `hubAPIQPS` | QPS for fleet-apiserver (not including events/node heartbeat) | `250` | | `hubAPIBurst` | Burst for fleet-apiserver (not including events/node heartbeat) | `1000` | @@ -41,4 +69,55 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen | `MaxFleetSizeSupported` | Max number of member clusters supported | `100` | | `resourceSnapshotCreationMinimumInterval` | The minimum interval at which resource snapshots could be created. | `30s` | | `resourceChangesCollectionDuration` | The duration for collecting resource changes into one snapshot. | `15s` | -| `enableWorkload` | Enable kubernetes builtin workload to run in hub cluster. | `false` | \ No newline at end of file +| `enableWorkload` | Enable kubernetes builtin workload to run in hub cluster. | `false` | + +## Certificate Management + +The hub-agent supports two modes for webhook certificate management: + +### Automatic Certificate Generation (Default) + +By default, the hub-agent generates certificates automatically at startup. This mode: +- Requires no external dependencies +- Works out of the box +- Certificates are valid for 10 years +- **Limitation: Only supports single replica deployment** (replicaCount must be 1) + +### cert-manager (Optional) + +When `useCertManager=true`, certificates are managed by cert-manager. This mode: +- Requires cert-manager to be installed as a prerequisite +- Requires `enableWorkload=true` to allow cert-manager pods to run in the hub cluster (without this, pod creation would be blocked by the webhook) +- Requires `enableWebhook=true` because cert-manager is only used for webhook certificate management +- Handles certificate rotation automatically (90-day certificates) +- Follows industry-standard certificate management practices +- **Supports high availability with multiple replicas** (replicaCount > 1) +- Suitable for production environments + +To switch to cert-manager mode: +```console +# Install cert-manager first (omit --version to get latest, or specify a version like --version v1.16.2) +# Note: See CERT_MANAGER_VERSION in .github/workflows/ci.yml for the version tested in CI +helm repo add jetstack https://charts.jetstack.io +helm repo update +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --set crds.enabled=true + +# Then install hub-agent with cert-manager enabled +helm install hub-agent ./charts/hub-agent --set useCertManager=true --set enableWorkload=true --set enableWebhook=true +``` + +The `webhookCertSecretName` parameter specifies the Secret name for the certificate: +- Default: `fleet-webhook-server-cert` +- When using cert-manager, this is where cert-manager stores the certificate +- Must match the secret name referenced in the deployment volume mount + +Example with custom secret name: +```console +helm install hub-agent ./charts/hub-agent \ + --set useCertManager=true \ + --set enableWorkload=true \ + --set webhookCertSecretName=my-webhook-secret +``` \ No newline at end of file diff --git a/charts/hub-agent/templates/certificate.yaml b/charts/hub-agent/templates/certificate.yaml new file mode 100644 index 000000000..f96a2b7ce --- /dev/null +++ b/charts/hub-agent/templates/certificate.yaml @@ -0,0 +1,63 @@ +{{- if and .Values.enableWebhook .Values.useCertManager }} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + # This name must match FleetWebhookCertName in pkg/webhook/webhook.go + name: fleet-webhook-certificate + namespace: {{ .Values.namespace }} + labels: + {{- include "hub-agent.labels" . | nindent 4 }} +spec: + # Secret name where cert-manager will store the certificate + secretName: {{ .Values.webhookCertSecretName }} + + # Certificate duration (90 days is cert-manager's default and recommended) + duration: 2160h # 90 days + + # Renew certificate 30 days before expiry + renewBefore: 720h # 30 days + + # Subject configuration + subject: + organizations: + - KubeFleet + + # Common name + commonName: fleet-webhook.{{ .Values.namespace }}.svc + + # DNS names for the certificate + dnsNames: + - {{ .Values.webhookServiceName }} + - {{ .Values.webhookServiceName }}.{{ .Values.namespace }} + - {{ .Values.webhookServiceName }}.{{ .Values.namespace }}.svc + - {{ .Values.webhookServiceName }}.{{ .Values.namespace }}.svc.cluster.local + + # Issuer reference - using self-signed issuer + issuerRef: + name: fleet-selfsigned-issuer + kind: Issuer + group: cert-manager.io + + # Private key configuration + privateKey: + algorithm: ECDSA + size: 256 + + # Key usages + usages: + - digital signature + - key encipherment + - server auth +--- +# Self-signed issuer for generating the certificate +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: fleet-selfsigned-issuer + namespace: {{ .Values.namespace }} + labels: + {{- include "hub-agent.labels" . | nindent 4 }} +spec: + selfSigned: {} +{{- end }} diff --git a/charts/hub-agent/templates/deployment.yaml b/charts/hub-agent/templates/deployment.yaml index 7ed6ab7ba..9d652be57 100644 --- a/charts/hub-agent/templates/deployment.yaml +++ b/charts/hub-agent/templates/deployment.yaml @@ -1,3 +1,6 @@ +{{- if and (not .Values.useCertManager) (gt (.Values.replicaCount | int) 1) }} +{{- fail "ERROR: replicaCount > 1 requires useCertManager=true (self-signed certificates cannot be shared across replicas)" }} +{{- end }} apiVersion: apps/v1 kind: Deployment metadata: @@ -6,6 +9,7 @@ metadata: labels: {{- include "hub-agent.labels" . | nindent 4 }} spec: + replicas: {{ .Values.replicaCount }} selector: matchLabels: {{- include "hub-agent.selectorLabels" . | nindent 6 }} @@ -25,6 +29,7 @@ spec: - --webhook-service-name={{ .Values.webhookServiceName }} - --enable-guard-rail={{ .Values.enableGuardRail }} - --enable-workload={{ .Values.enableWorkload }} + - --use-cert-manager={{ .Values.useCertManager }} - --whitelisted-users=system:serviceaccount:fleet-system:hub-agent-sa - --webhook-client-connection-type={{.Values.webhookClientConnectionType}} - --v={{ .Values.logVerbosity }} @@ -73,6 +78,22 @@ spec: fieldPath: metadata.namespace resources: {{- toYaml .Values.resources | nindent 12 }} + {{- if .Values.useCertManager }} + volumeMounts: + - name: webhook-cert + # This path must match FleetWebhookCertDir in pkg/webhook/webhook.go + mountPath: /tmp/k8s-webhook-server/serving-certs + readOnly: true + {{- end }} + {{- if .Values.useCertManager }} + volumes: + - name: webhook-cert + secret: + secretName: {{ .Values.webhookCertSecretName }} + # defaultMode 0444 (read for all) allows the container process to read the certs + # regardless of the user/group it runs as + defaultMode: 0444 + {{- end }} {{- with .Values.affinity }} affinity: {{- toYaml . | nindent 8 }} diff --git a/charts/hub-agent/values.yaml b/charts/hub-agent/values.yaml index a423c6270..ab1fac4db 100644 --- a/charts/hub-agent/values.yaml +++ b/charts/hub-agent/values.yaml @@ -17,13 +17,20 @@ webhookServiceName: fleetwebhook enableGuardRail: true webhookClientConnectionType: service enableWorkload: false +# useCertManager enables cert-manager for webhook certificate management +# When enabled, cert-manager must be installed as a prerequisite (it is not installed automatically by this chart) +# and a Certificate resource will be created +useCertManager: false +# webhookCertSecretName is ONLY used when useCertManager=true +# It specifies the name of the Secret where cert-manager stores the certificate +# webhookCertSecretName: fleet-webhook-server-cert + forceDeleteWaitTime: 15m0s clusterUnhealthyThreshold: 3m0s resourceSnapshotCreationMinimumInterval: 30s resourceChangesCollectionDuration: 15s -namespace: - fleet-system +namespace: fleet-system resources: limits: diff --git a/cmd/hubagent/main.go b/cmd/hubagent/main.go index d69d1d867..2e67bd2b7 100644 --- a/cmd/hubagent/main.go +++ b/cmd/hubagent/main.go @@ -20,8 +20,8 @@ import ( "flag" "fmt" "math" + "net/http" "os" - "strings" "sync" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -65,8 +65,7 @@ var ( ) const ( - FleetWebhookCertDir = "/tmp/k8s-webhook-server/serving-certs" - FleetWebhookPort = 9443 + FleetWebhookPort = 9443 ) func init() { @@ -121,7 +120,7 @@ func main() { }, WebhookServer: ctrlwebhook.NewServer(ctrlwebhook.Options{ Port: FleetWebhookPort, - CertDir: FleetWebhookCertDir, + CertDir: webhook.FleetWebhookCertDir, }), } if opts.EnablePprof { @@ -157,12 +156,31 @@ func main() { } if opts.EnableWebhook { - whiteListedUsers := strings.Split(opts.WhiteListedUsers, ",") - if err := SetupWebhook(mgr, options.WebhookClientConnectionType(opts.WebhookClientConnectionType), opts.WebhookServiceName, whiteListedUsers, - opts.EnableGuardRail, opts.EnableV1Beta1APIs, opts.DenyModifyMemberClusterLabels, opts.EnableWorkload, opts.NetworkingAgentsEnabled); err != nil { + // Generate webhook configuration with certificates + webhookConfig, err := webhook.NewWebhookConfigFromOptions(mgr, opts, FleetWebhookPort) + if err != nil { + klog.ErrorS(err, "unable to create webhook config") + exitWithErrorFunc() + } + + // Setup webhooks with the manager + if err := SetupWebhook(mgr, webhookConfig); err != nil { klog.ErrorS(err, "unable to set up webhook") exitWithErrorFunc() } + + // When using cert-manager, add a readiness check to ensure CA bundles are injected before marking ready. + // This prevents the pod from accepting traffic before cert-manager has populated the webhook CA bundles, + // which would cause webhook calls to fail. + if opts.UseCertManager { + if err := mgr.AddReadyzCheck("cert-manager-ca-injection", func(req *http.Request) error { + return webhookConfig.CheckCAInjection(req.Context()) + }); err != nil { + klog.ErrorS(err, "unable to set up cert-manager CA injection readiness check") + exitWithErrorFunc() + } + klog.V(2).InfoS("Added cert-manager CA injection readiness check") + } } ctx := ctrl.SetupSignalHandler() @@ -200,20 +218,13 @@ func main() { wg.Wait() } -// SetupWebhook generates the webhook cert and then set up the webhook configurator. -func SetupWebhook(mgr manager.Manager, webhookClientConnectionType options.WebhookClientConnectionType, webhookServiceName string, - whiteListedUsers []string, enableGuardRail, isFleetV1Beta1API bool, denyModifyMemberClusterLabels bool, enableWorkload bool, networkingAgentsEnabled bool) error { - // Generate self-signed key and crt files in FleetWebhookCertDir for the webhook server to start. - w, err := webhook.NewWebhookConfig(mgr, webhookServiceName, FleetWebhookPort, &webhookClientConnectionType, FleetWebhookCertDir, enableGuardRail, denyModifyMemberClusterLabels, enableWorkload) - if err != nil { - klog.ErrorS(err, "fail to generate WebhookConfig") - return err - } - if err = mgr.Add(w); err != nil { +// SetupWebhook registers the webhook config and webhook handlers with the manager. +func SetupWebhook(mgr manager.Manager, webhookConfig *webhook.Config) error { + if err := mgr.Add(webhookConfig); err != nil { klog.ErrorS(err, "unable to add WebhookConfig") return err } - if err = webhook.AddToManager(mgr, whiteListedUsers, denyModifyMemberClusterLabels, networkingAgentsEnabled); err != nil { + if err := webhook.AddToManager(mgr, webhookConfig); err != nil { klog.ErrorS(err, "unable to register webhooks to the manager") return err } diff --git a/cmd/hubagent/options/options.go b/cmd/hubagent/options/options.go index 6573789c1..7568ae7cc 100644 --- a/cmd/hubagent/options/options.go +++ b/cmd/hubagent/options/options.go @@ -110,6 +110,9 @@ type Options struct { // EnableWorkload enables workload resources (pods and replicasets) to be created in the hub cluster. // When set to true, the pod and replicaset validating webhooks are disabled. EnableWorkload bool + // UseCertManager indicates whether to use cert-manager for webhook certificate management. + // When enabled, webhook certificates are managed by cert-manager instead of self-signed generation. + UseCertManager bool // ResourceSnapshotCreationMinimumInterval is the minimum interval at which resource snapshots could be created. // Whether the resource snapshot is created or not depends on the both ResourceSnapshotCreationMinimumInterval and ResourceChangesCollectionDuration. ResourceSnapshotCreationMinimumInterval time.Duration @@ -185,6 +188,7 @@ func (o *Options) AddFlags(flags *flag.FlagSet) { flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.") flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.") flags.BoolVar(&o.EnableWorkload, "enable-workload", false, "If set, workloads (pods and replicasets) can be created in the hub cluster. This disables the pod and replicaset validating webhooks.") + flags.BoolVar(&o.UseCertManager, "use-cert-manager", false, "If set, cert-manager will be used for webhook certificate management instead of self-signed certificates.") flags.DurationVar(&o.ResourceSnapshotCreationMinimumInterval, "resource-snapshot-creation-minimum-interval", 30*time.Second, "The minimum interval at which resource snapshots could be created.") flags.DurationVar(&o.ResourceChangesCollectionDuration, "resource-changes-collection-duration", 15*time.Second, "The duration for collecting resource changes into one snapshot. The default is 15 seconds, which means that the controller will collect resource changes for 15 seconds before creating a resource snapshot.") diff --git a/cmd/hubagent/options/validation.go b/cmd/hubagent/options/validation.go index df5af530f..9e12ab049 100644 --- a/cmd/hubagent/options/validation.go +++ b/cmd/hubagent/options/validation.go @@ -52,6 +52,10 @@ func (o *Options) Validate() field.ErrorList { errs = append(errs, field.Invalid(newPath.Child("WebhookServiceName"), o.WebhookServiceName, "Webhook service name is required when webhook is enabled")) } + if o.UseCertManager && !o.EnableWorkload { + errs = append(errs, field.Invalid(newPath.Child("UseCertManager"), o.UseCertManager, "UseCertManager requires EnableWorkload to be true (when EnableWorkload is false, a validating webhook blocks pod creation except for certain system pods; cert-manager controller pods must be allowed to run in the hub cluster)")) + } + connectionType := o.WebhookClientConnectionType if _, err := parseWebhookClientConnectionString(connectionType); err != nil { errs = append(errs, field.Invalid(newPath.Child("WebhookClientConnectionType"), o.WebhookClientConnectionType, err.Error())) diff --git a/cmd/hubagent/options/validation_test.go b/cmd/hubagent/options/validation_test.go index b4cf80b9c..4711ff277 100644 --- a/cmd/hubagent/options/validation_test.go +++ b/cmd/hubagent/options/validation_test.go @@ -27,6 +27,10 @@ import ( "k8s.io/apimachinery/pkg/util/validation/field" ) +const ( + testWebhookServiceName = "test-webhook" +) + // a callback function to modify options type ModifyOptions func(option *Options) @@ -93,6 +97,32 @@ func TestValidateControllerManagerConfiguration(t *testing.T) { }), want: field.ErrorList{field.Invalid(newPath.Child("WebhookServiceName"), "", "Webhook service name is required when webhook is enabled")}, }, + "UseCertManager with EnableWebhook": { + opt: newTestOptions(func(option *Options) { + option.EnableWebhook = true + option.WebhookServiceName = testWebhookServiceName + option.UseCertManager = true + }), + want: field.ErrorList{field.Invalid(newPath.Child("UseCertManager"), true, "UseCertManager requires EnableWorkload to be true (when EnableWorkload is false, a validating webhook blocks pod creation except for certain system pods; cert-manager controller pods must be allowed to run in the hub cluster)")}, + }, + "UseCertManager without EnableWorkload": { + opt: newTestOptions(func(option *Options) { + option.EnableWebhook = true + option.WebhookServiceName = testWebhookServiceName + option.UseCertManager = true + option.EnableWorkload = false + }), + want: field.ErrorList{field.Invalid(newPath.Child("UseCertManager"), true, "UseCertManager requires EnableWorkload to be true (when EnableWorkload is false, a validating webhook blocks pod creation except for certain system pods; cert-manager controller pods must be allowed to run in the hub cluster)")}, + }, + "UseCertManager with EnableWebhook and EnableWorkload": { + opt: newTestOptions(func(option *Options) { + option.EnableWebhook = true + option.WebhookServiceName = testWebhookServiceName + option.UseCertManager = true + option.EnableWorkload = true + }), + want: field.ErrorList{}, + }, } for name, tc := range testCases { diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index 4f4c89b56..5f518d692 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -29,6 +29,7 @@ import ( "math/big" "os" "path/filepath" + "strings" "time" admv1 "k8s.io/api/admissionregistration/v1" @@ -74,6 +75,13 @@ const ( fleetGuardRailWebhookCfgName = "fleet-guard-rail-webhook-configuration" fleetMutatingWebhookCfgName = "fleet-mutating-webhook-configuration" + // FleetWebhookCertDir is the directory where webhook certificates are stored. + // This path must match the volumeMount path in charts/hub-agent/templates/deployment.yaml + FleetWebhookCertDir = "/tmp/k8s-webhook-server/serving-certs" + // FleetWebhookCertName is the name of the Certificate resource created by cert-manager. + // This name must match the Certificate name in charts/hub-agent/templates/certificate.yaml + FleetWebhookCertName = "fleet-webhook-certificate" + crdResourceName = "customresourcedefinitions" bindingResourceName = "bindings" configMapResourceName = "configmaps" @@ -134,14 +142,14 @@ var AddToManagerFleetResourceValidator func(manager.Manager, []string, bool) err var AddToManagerMemberclusterValidator func(manager.Manager, bool) // AddToManager adds all Controllers to the Manager -func AddToManager(m manager.Manager, whiteListedUsers []string, denyModifyMemberClusterLabels bool, networkingAgentsEnabled bool) error { +func AddToManager(m manager.Manager, config *Config) error { for _, f := range AddToManagerFuncs { if err := f(m); err != nil { return err } } - AddToManagerMemberclusterValidator(m, networkingAgentsEnabled) - return AddToManagerFleetResourceValidator(m, whiteListedUsers, denyModifyMemberClusterLabels) + AddToManagerMemberclusterValidator(m, config.networkingAgentsEnabled) + return AddToManagerFleetResourceValidator(m, config.whiteListedUsers, config.denyModifyMemberClusterLabels) } type Config struct { @@ -162,9 +170,18 @@ type Config struct { denyModifyMemberClusterLabels bool enableWorkload bool + // useCertManager indicates whether cert-manager is used for certificate management + useCertManager bool + // webhookCertName is the name of the Certificate resource created by cert-manager. + // This is referenced in the cert-manager.io/inject-ca-from annotation. + webhookCertName string + // whiteListedUsers is a list of users who are allowed to modify fleet resources + whiteListedUsers []string + // networkingAgentsEnabled indicates whether networking agents are enabled + networkingAgentsEnabled bool } -func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32, clientConnectionType *options.WebhookClientConnectionType, certDir string, enableGuardRail bool, denyModifyMemberClusterLabels bool, enableWorkload bool) (*Config, error) { +func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32, clientConnectionType *options.WebhookClientConnectionType, certDir string, enableGuardRail bool, denyModifyMemberClusterLabels bool, enableWorkload bool, useCertManager bool, webhookCertName string, whiteListedUsers []string, networkingAgentsEnabled bool) (*Config, error) { // We assume the Pod namespace should be passed to env through downward API in the Pod spec. namespace := os.Getenv("POD_NAMESPACE") if namespace == "" { @@ -180,13 +197,42 @@ func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32 enableGuardRail: enableGuardRail, denyModifyMemberClusterLabels: denyModifyMemberClusterLabels, enableWorkload: enableWorkload, + useCertManager: useCertManager, + webhookCertName: webhookCertName, + whiteListedUsers: whiteListedUsers, + networkingAgentsEnabled: networkingAgentsEnabled, + } + + if useCertManager { + // When using cert-manager, the CA bundle is automatically injected by cert-manager's CA injector + // based on the cert-manager.io/inject-ca-from annotation. We don't need to load or set the CA here. + // The certificates (tls.crt and tls.key) are mounted by Kubernetes and used automatically by the webhook server. + klog.V(2).InfoS("Using cert-manager for certificate management", "certDir", certDir) + } else { + // Use self-signed certificate generation (original flow) + caPEM, err := w.genCertificate(certDir) + if err != nil { + return nil, fmt.Errorf("failed to generate self-signed certificate: %w", err) + } + w.caPEM = caPEM } - caPEM, err := w.genCertificate(certDir) - if err != nil { - return nil, err - } - w.caPEM = caPEM - return &w, err + + return &w, nil +} + +// NewWebhookConfigFromOptions creates a webhook config from command-line options. +// This helper handles type conversions from option strings to proper types. +// Note: This function assumes opts has been pre-validated using opts.Validate(). +// String-to-enum conversions (e.g., WebhookClientConnectionType) are performed without +// additional validation, as validation happens at the Options level. +func NewWebhookConfigFromOptions(mgr manager.Manager, opts *options.Options, webhookPort int32) (*Config, error) { + webhookClientConnectionType := options.WebhookClientConnectionType(opts.WebhookClientConnectionType) + whiteListedUsers := strings.Split(opts.WhiteListedUsers, ",") + + return NewWebhookConfig(mgr, opts.WebhookServiceName, webhookPort, + &webhookClientConnectionType, FleetWebhookCertDir, opts.EnableGuardRail, + opts.DenyModifyMemberClusterLabels, opts.EnableWorkload, opts.UseCertManager, + FleetWebhookCertName, whiteListedUsers, opts.NetworkingAgentsEnabled) } func (w *Config) Start(ctx context.Context) error { @@ -198,6 +244,66 @@ func (w *Config) Start(ctx context.Context) error { return nil } +// CheckCAInjection verifies that cert-manager has injected the CA bundle into all webhook configurations. +// This is used as a readiness check when useCertManager is enabled. +// Returns nil when CA bundles are injected, or an error if they are missing. +func (w *Config) CheckCAInjection(ctx context.Context) error { + if !w.useCertManager { + // Not using cert-manager, no need to check + return nil + } + + cl := w.mgr.GetClient() + + // Check mutating webhook configuration + if err := w.checkMutatingWebhookCABundle(ctx, cl, fleetMutatingWebhookCfgName); err != nil { + return err + } + + // Check validating webhook configuration + if err := w.checkValidatingWebhookCABundle(ctx, cl, fleetValidatingWebhookCfgName); err != nil { + return err + } + + // Check guard rail webhook configuration if enabled + if w.enableGuardRail { + if err := w.checkValidatingWebhookCABundle(ctx, cl, fleetGuardRailWebhookCfgName); err != nil { + return err + } + } + + klog.V(2).InfoS("All webhook configurations have CA bundles injected by cert-manager") + return nil +} + +// checkMutatingWebhookCABundle verifies that all webhooks in a MutatingWebhookConfiguration have CA bundles. +func (w *Config) checkMutatingWebhookCABundle(ctx context.Context, cl client.Client, configName string) error { + var cfg admv1.MutatingWebhookConfiguration + if err := cl.Get(ctx, client.ObjectKey{Name: configName}, &cfg); err != nil { + return fmt.Errorf("failed to get MutatingWebhookConfiguration %s: %w", configName, err) + } + for _, webhook := range cfg.Webhooks { + if len(webhook.ClientConfig.CABundle) == 0 { + return fmt.Errorf("MutatingWebhookConfiguration %s webhook %s is missing CA bundle (cert-manager injection pending)", configName, webhook.Name) + } + } + return nil +} + +// checkValidatingWebhookCABundle verifies that all webhooks in a ValidatingWebhookConfiguration have CA bundles. +func (w *Config) checkValidatingWebhookCABundle(ctx context.Context, cl client.Client, configName string) error { + var cfg admv1.ValidatingWebhookConfiguration + if err := cl.Get(ctx, client.ObjectKey{Name: configName}, &cfg); err != nil { + return fmt.Errorf("failed to get ValidatingWebhookConfiguration %s: %w", configName, err) + } + for _, webhook := range cfg.Webhooks { + if len(webhook.ClientConfig.CABundle) == 0 { + return fmt.Errorf("ValidatingWebhookConfiguration %s webhook %s is missing CA bundle (cert-manager injection pending)", configName, webhook.Name) + } + } + return nil +} + // createFleetWebhookConfiguration creates the ValidatingWebhookConfiguration object for the webhook. func (w *Config) createFleetWebhookConfiguration(ctx context.Context) error { if err := w.createMutatingWebhookConfiguration(ctx, w.buildFleetMutatingWebhooks(), fleetMutatingWebhookCfgName); err != nil { @@ -214,14 +320,29 @@ func (w *Config) createFleetWebhookConfiguration(ctx context.Context) error { return nil } +// buildWebhookAnnotations creates annotations for webhook configurations. +// When using cert-manager, adds the inject-ca-from annotation to automatically inject the CA bundle. +func (w *Config) buildWebhookAnnotations() map[string]string { + annotations := map[string]string{} + if w.useCertManager { + // Tell cert-manager's CA injector to automatically inject the CA bundle from the Certificate resource. + // Format: "namespace/certificate-name" - references the Certificate resource, not the Secret. + annotations["cert-manager.io/inject-ca-from"] = fmt.Sprintf("%s/%s", w.serviceNamespace, w.webhookCertName) + } + return annotations +} + // createMutatingWebhookConfiguration creates the MutatingWebhookConfiguration object for the webhook. func (w *Config) createMutatingWebhookConfiguration(ctx context.Context, webhooks []admv1.MutatingWebhook, configName string) error { + annotations := w.buildWebhookAnnotations() + mutatingWebhookConfig := admv1.MutatingWebhookConfiguration{ ObjectMeta: metav1.ObjectMeta{ Name: configName, Labels: map[string]string{ "admissions.enforcer/disabled": "true", }, + Annotations: annotations, }, Webhooks: webhooks, } @@ -269,12 +390,15 @@ func (w *Config) buildFleetMutatingWebhooks() []admv1.MutatingWebhook { } func (w *Config) createValidatingWebhookConfiguration(ctx context.Context, webhooks []admv1.ValidatingWebhook, configName string) error { + annotations := w.buildWebhookAnnotations() + validatingWebhookConfig := admv1.ValidatingWebhookConfiguration{ ObjectMeta: metav1.ObjectMeta{ Name: configName, Labels: map[string]string{ "admissions.enforcer/disabled": "true", }, + Annotations: annotations, }, Webhooks: webhooks, } @@ -631,9 +755,14 @@ func (w *Config) createClientConfig(validationPath string) admv1.WebhookClientCo } serviceEndpoint := w.serviceURL + validationPath serviceRef.Path = ptr.To(validationPath) - config := admv1.WebhookClientConfig{ - CABundle: w.caPEM, + config := admv1.WebhookClientConfig{} + + // When using cert-manager, leave CABundle empty so cert-manager's CA injector can populate it. + // The cert-manager.io/inject-ca-from annotation triggers automatic CA injection. + if !w.useCertManager { + config.CABundle = w.caPEM } + switch *w.clientConnectionType { case options.Service: config.Service = &serviceRef diff --git a/pkg/webhook/webhook_test.go b/pkg/webhook/webhook_test.go index f1975b30b..1456fd74f 100644 --- a/pkg/webhook/webhook_test.go +++ b/pkg/webhook/webhook_test.go @@ -1,15 +1,23 @@ package webhook import ( + "context" + "strings" "testing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" + admv1 "k8s.io/api/admissionregistration/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/manager" "github.com/kubefleet-dev/kubefleet/cmd/hubagent/options" "github.com/kubefleet-dev/kubefleet/pkg/utils" + testmanager "github.com/kubefleet-dev/kubefleet/test/utils/manager" ) func TestBuildFleetMutatingWebhooks(t *testing.T) { @@ -110,6 +118,7 @@ func TestNewWebhookConfig(t *testing.T) { enableGuardRail bool denyModifyMemberClusterLabels bool enableWorkload bool + useCertManager bool want *Config wantErr bool }{ @@ -119,10 +128,11 @@ func TestNewWebhookConfig(t *testing.T) { webhookServiceName: "test-webhook", port: 8080, clientConnectionType: nil, - certDir: "/tmp/cert", + certDir: t.TempDir(), enableGuardRail: true, denyModifyMemberClusterLabels: true, enableWorkload: false, + useCertManager: false, want: &Config{ serviceNamespace: "test-namespace", serviceName: "test-webhook", @@ -131,6 +141,30 @@ func TestNewWebhookConfig(t *testing.T) { enableGuardRail: true, denyModifyMemberClusterLabels: true, enableWorkload: false, + useCertManager: false, + }, + wantErr: false, + }, + { + name: "valid input with cert-manager", + mgr: nil, + webhookServiceName: "test-webhook", + port: 8080, + clientConnectionType: nil, + certDir: t.TempDir(), + enableGuardRail: true, + denyModifyMemberClusterLabels: true, + enableWorkload: true, + useCertManager: true, + want: &Config{ + serviceNamespace: "test-namespace", + serviceName: "test-webhook", + servicePort: 8080, + clientConnectionType: nil, + enableGuardRail: true, + denyModifyMemberClusterLabels: true, + enableWorkload: true, + useCertManager: true, }, wantErr: false, }, @@ -138,8 +172,8 @@ func TestNewWebhookConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Setenv("POD_NAMESPACE", "test-namespace") - defer t.Setenv("POD_NAMESPACE", "") - got, err := NewWebhookConfig(tt.mgr, tt.webhookServiceName, tt.port, tt.clientConnectionType, tt.certDir, tt.enableGuardRail, tt.denyModifyMemberClusterLabels, tt.enableWorkload) + + got, err := NewWebhookConfig(tt.mgr, tt.webhookServiceName, tt.port, tt.clientConnectionType, tt.certDir, tt.enableGuardRail, tt.denyModifyMemberClusterLabels, tt.enableWorkload, tt.useCertManager, "fleet-webhook-server-cert", nil, false) if (err != nil) != tt.wantErr { t.Errorf("NewWebhookConfig() error = %v, wantErr %v", err, tt.wantErr) return @@ -161,3 +195,487 @@ func TestNewWebhookConfig(t *testing.T) { }) } } + +func TestNewWebhookConfig_SelfSignedCertError(t *testing.T) { + t.Setenv("POD_NAMESPACE", "test-namespace") + + // Use an invalid certDir (read-only location) to force genCertificate to fail + invalidCertDir := "/proc/invalid-cert-dir" + + clientConnectionType := options.Service + _, err := NewWebhookConfig( + nil, + "test-service", + 443, + &clientConnectionType, + invalidCertDir, + false, // enableGuardRail + false, // denyModifyMemberClusterLabels + false, // enableWorkload + false, // useCertManager = false to trigger self-signed path + "fleet-webhook-server-cert", // webhookCertName + nil, // whiteListedUsers + false, // networkingAgentsEnabled + ) + + if err == nil { + t.Fatal("Expected error when genCertificate fails, got nil") + } + + expectedErrMsg := "failed to generate self-signed certificate" + if !strings.Contains(err.Error(), expectedErrMsg) { + t.Errorf("Expected error to contain '%s', got: %v", expectedErrMsg, err) + } +} + +func TestNewWebhookConfigFromOptions(t *testing.T) { + t.Setenv("POD_NAMESPACE", "test-namespace") + + testCases := map[string]struct { + opts *options.Options + wantErr bool + wantConfig *Config + }{ + "valid options with cert-manager": { + opts: &options.Options{ + WebhookServiceName: "test-webhook", + WebhookClientConnectionType: "service", + EnableGuardRail: true, + DenyModifyMemberClusterLabels: true, + EnableWorkload: true, + UseCertManager: true, + WhiteListedUsers: "user1,user2,user3", + NetworkingAgentsEnabled: true, + }, + wantErr: false, + wantConfig: &Config{ + serviceNamespace: "test-namespace", + serviceName: "test-webhook", + servicePort: 8443, + enableGuardRail: true, + denyModifyMemberClusterLabels: true, + enableWorkload: true, + useCertManager: true, + webhookCertName: "fleet-webhook-certificate", + whiteListedUsers: []string{"user1", "user2", "user3"}, + networkingAgentsEnabled: true, + }, + }, + "valid options without cert-manager": { + opts: &options.Options{ + WebhookServiceName: "test-webhook", + WebhookClientConnectionType: "url", + EnableGuardRail: false, + DenyModifyMemberClusterLabels: false, + EnableWorkload: false, + UseCertManager: false, + WhiteListedUsers: "admin", + NetworkingAgentsEnabled: false, + }, + wantErr: false, + wantConfig: &Config{ + serviceNamespace: "test-namespace", + serviceName: "test-webhook", + servicePort: 8443, + enableGuardRail: false, + denyModifyMemberClusterLabels: false, + enableWorkload: false, + useCertManager: false, + webhookCertName: "fleet-webhook-certificate", + whiteListedUsers: []string{"admin"}, + networkingAgentsEnabled: false, + }, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + got, err := NewWebhookConfigFromOptions(nil, tc.opts, 8443) + if (err != nil) != tc.wantErr { + t.Errorf("NewWebhookConfigFromOptions() error = %v, wantErr %v", err, tc.wantErr) + return + } + + if tc.wantErr { + return + } + + // Verify key fields are set correctly + opts := []cmp.Option{ + cmpopts.IgnoreFields(Config{}, "caPEM", "mgr", "clientConnectionType", "serviceURL"), + } + opts = append(opts, cmpopts.IgnoreUnexported(Config{})) + if diff := cmp.Diff(tc.wantConfig, got, opts...); diff != "" { + t.Errorf("NewWebhookConfigFromOptions() mismatch (-want +got):\n%s", diff) + } + + // Verify whiteListedUsers were split correctly + if diff := cmp.Diff(tc.wantConfig.whiteListedUsers, got.whiteListedUsers); diff != "" { + t.Errorf("whiteListedUsers mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestBuildWebhookAnnotations(t *testing.T) { + testCases := map[string]struct { + config Config + expectedAnnotions map[string]string + }{ + "useCertManager is false - returns empty map": { + config: Config{ + useCertManager: false, + serviceNamespace: "fleet-system", + webhookCertName: "fleet-webhook-server-cert", + }, + expectedAnnotions: map[string]string{}, + }, + "useCertManager is true - returns annotation with correct format": { + config: Config{ + useCertManager: true, + serviceNamespace: "fleet-system", + webhookCertName: "fleet-webhook-server-cert", + }, + expectedAnnotions: map[string]string{ + "cert-manager.io/inject-ca-from": "fleet-system/fleet-webhook-server-cert", + }, + }, + "useCertManager is true with custom namespace and cert name": { + config: Config{ + useCertManager: true, + serviceNamespace: "custom-namespace", + webhookCertName: "custom-webhook-cert", + }, + expectedAnnotions: map[string]string{ + "cert-manager.io/inject-ca-from": "custom-namespace/custom-webhook-cert", + }, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + got := tc.config.buildWebhookAnnotations() + if diff := cmp.Diff(tc.expectedAnnotions, got); diff != "" { + t.Errorf("buildWebhookAnnotations() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestCreateClientConfig(t *testing.T) { + serviceConnectionType := options.Service + testCases := map[string]struct { + config Config + validationPath string + expectCABundle bool + }{ + "useCertManager is false - CABundle should be set": { + config: Config{ + useCertManager: false, + caPEM: []byte("test-ca-bundle"), + serviceNamespace: "fleet-system", + serviceName: "fleet-webhook-service", + servicePort: 443, + clientConnectionType: &serviceConnectionType, + }, + validationPath: "/validate", + expectCABundle: true, + }, + "useCertManager is true - CABundle should be empty for cert-manager injection": { + config: Config{ + useCertManager: true, + caPEM: []byte("test-ca-bundle"), + serviceNamespace: "fleet-system", + serviceName: "fleet-webhook-service", + servicePort: 443, + clientConnectionType: &serviceConnectionType, + }, + validationPath: "/validate", + expectCABundle: false, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + clientConfig := tc.config.createClientConfig(tc.validationPath) + + if tc.expectCABundle { + if len(clientConfig.CABundle) == 0 { + t.Errorf("Expected CABundle to be set, but it was empty") + } + if diff := cmp.Diff(tc.config.caPEM, clientConfig.CABundle); diff != "" { + t.Errorf("CABundle mismatch (-want +got):\n%s", diff) + } + } else { + if len(clientConfig.CABundle) != 0 { + t.Errorf("Expected CABundle to be empty for cert-manager injection, but got: %v", clientConfig.CABundle) + } + } + + // Verify service reference is set correctly + if clientConfig.Service == nil { + t.Errorf("Expected Service to be set") + } else { + if clientConfig.Service.Namespace != tc.config.serviceNamespace { + t.Errorf("Expected Service.Namespace=%s, got %s", tc.config.serviceNamespace, clientConfig.Service.Namespace) + } + if clientConfig.Service.Name != tc.config.serviceName { + t.Errorf("Expected Service.Name=%s, got %s", tc.config.serviceName, clientConfig.Service.Name) + } + if *clientConfig.Service.Port != tc.config.servicePort { + t.Errorf("Expected Service.Port=%d, got %d", tc.config.servicePort, *clientConfig.Service.Port) + } + if *clientConfig.Service.Path != tc.validationPath { + t.Errorf("Expected Service.Path=%s, got %s", tc.validationPath, *clientConfig.Service.Path) + } + } + }) + } +} + +func TestCheckCAInjection(t *testing.T) { + testCases := map[string]struct { + config Config + existingObjects []client.Object + expectError bool + expectedErrorMsg string + }{ + "useCertManager is false - returns nil without checking": { + config: Config{ + useCertManager: false, + enableGuardRail: false, + }, + expectError: false, + }, + "useCertManager is true, all CA bundles present": { + config: Config{ + useCertManager: true, + enableGuardRail: false, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook-1", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + { + Name: "test-validating-webhook-2", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + }, + expectError: false, + }, + "useCertManager is true, mutating webhook missing CA bundle": { + config: Config{ + useCertManager: true, + enableGuardRail: false, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: nil, // Missing CA bundle + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + }, + expectError: true, + expectedErrorMsg: "test-mutating-webhook is missing CA bundle", + }, + "useCertManager is true, validating webhook missing CA bundle": { + config: Config{ + useCertManager: true, + enableGuardRail: false, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook-1", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + { + Name: "test-validating-webhook-2", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte{}, // Empty CA bundle + }, + }, + }, + }, + }, + expectError: true, + expectedErrorMsg: "test-validating-webhook-2 is missing CA bundle", + }, + "useCertManager is true with guard rail, all CA bundles present": { + config: Config{ + useCertManager: true, + enableGuardRail: true, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetGuardRailWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-guard-rail-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + }, + expectError: false, + }, + "useCertManager is true with guard rail, guard rail webhook missing CA bundle": { + config: Config{ + useCertManager: true, + enableGuardRail: true, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetGuardRailWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-guard-rail-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: nil, // Missing CA bundle + }, + }, + }, + }, + }, + expectError: true, + expectedErrorMsg: "test-guard-rail-webhook is missing CA bundle", + }, + "useCertManager is true, webhook configuration not found": { + config: Config{ + useCertManager: true, + enableGuardRail: false, + }, + existingObjects: []client.Object{}, + expectError: true, + expectedErrorMsg: "failed to get MutatingWebhookConfiguration", + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + // Create a fake client with a proper scheme + scheme := runtime.NewScheme() + _ = admv1.AddToScheme(scheme) + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(tc.existingObjects...). + Build() + + // Create a fake manager that returns our fake client + tc.config.mgr = &testmanager.FakeManager{Client: fakeClient} + + err := tc.config.CheckCAInjection(context.Background()) + + if tc.expectError { + if err == nil { + t.Errorf("Expected error but got nil") + } else if !strings.Contains(err.Error(), tc.expectedErrorMsg) { + t.Errorf("Expected error message to contain %q, but got: %v", tc.expectedErrorMsg, err) + } + } else { + if err != nil { + t.Errorf("Expected no error but got: %v", err) + } + } + }) + } +} diff --git a/test/e2e/setup.sh b/test/e2e/setup.sh index d4b6b8a2a..802584007 100755 --- a/test/e2e/setup.sh +++ b/test/e2e/setup.sh @@ -28,6 +28,7 @@ export PROPERTY_PROVIDER="${PROPERTY_PROVIDER:-azure}" export USE_PREDEFINED_REGIONS="${USE_PREDEFINED_REGIONS:-false}" export RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL="${RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL:-0m}" export RESOURCE_CHANGES_COLLECTION_DURATION="${RESOURCE_CHANGES_COLLECTION_DURATION:-0m}" +export CERT_MANAGER_VERSION="${CERT_MANAGER_VERSION:-v1.16.2}" # The pre-defined regions; if the AKS property provider is used. # @@ -114,14 +115,32 @@ done # Install the helm charts -# Install the hub agent to the hub cluster kind export kubeconfig --name $HUB_CLUSTER + +# Install cert-manager first (required for webhook certificates) +echo "Installing cert-manager..." + +# Install cert-manager using Helm to avoid ownership conflicts with hub-agent chart +helm repo add jetstack https://charts.jetstack.io --force-update +helm repo update +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --version $CERT_MANAGER_VERSION \ + --set crds.enabled=true \ + --wait \ + --timeout=300s + +# Install the hub agent to the hub cluster helm install hub-agent ../../charts/hub-agent/ \ --set image.pullPolicy=Never \ --set image.repository=$REGISTRY/$HUB_AGENT_IMAGE \ --set image.tag=$TAG \ --set namespace=fleet-system \ --set logVerbosity=5 \ + --set replicaCount=3 \ + --set useCertManager=true \ + --set webhookCertSecretName=fleet-webhook-server-cert \ --set enableWebhook=true \ --set enableWorkload=true \ --set webhookClientConnectionType=service \ diff --git a/test/utils/manager/manager.go b/test/utils/manager/manager.go new file mode 100644 index 000000000..adceb6f37 --- /dev/null +++ b/test/utils/manager/manager.go @@ -0,0 +1,35 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package manager provides a fake controller-runtime manager for testing. +package manager + +import ( + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// FakeManager is a fake controller-runtime manager for testing. +type FakeManager struct { + manager.Manager + // Client is the Kubernetes client used by this manager. + Client client.Client +} + +// GetClient returns the configured client. +func (f *FakeManager) GetClient() client.Client { + return f.Client +}