From bb654c4a1e2751903999a3152f15c616c44cc6d6 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Mon, 20 Apr 2026 08:59:05 -0400 Subject: [PATCH] fix(azure): change workload identity webhook FailurePolicy from Fail to Ignore During hosted cluster bootstrap on Azure, there is a race condition between the MutatingWebhookConfiguration being registered and the webhook sidecar being ready to serve. The webhook sidecar waits for KAS to be available before starting, but the HCCO registers the webhook configuration as soon as it can talk to guest KAS. With FailurePolicy: Fail, any pod creation matching the azure.workload.identity/use label during this window is rejected, causing components like oauth-apiserver and router to restart. This matches the pattern used by the AWS pod identity webhook, which already uses FailurePolicy: Ignore. Pods that miss the mutation will be re-created by their controllers once the webhook is ready. Signed-off-by: Bryan Cox Co-Authored-By: Claude Opus 4.6 --- .../resources/azure_workload_identity_webhook_test.go | 2 +- .../controllers/resources/resources.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/azure_workload_identity_webhook_test.go b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/azure_workload_identity_webhook_test.go index b7d4e4b23dc..d0f9e58263c 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/azure_workload_identity_webhook_test.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/azure_workload_identity_webhook_test.go @@ -71,7 +71,7 @@ func TestReconcileAzureIdentityWebhook(t *testing.T) { wh := webhook.Webhooks[0] g.Expect(wh.Name).To(Equal("pod-identity-webhook.azure.mutate.io")) g.Expect(wh.AdmissionReviewVersions).To(Equal([]string{"v1", "v1beta1"})) - g.Expect(*wh.FailurePolicy).To(Equal(admissionregistrationv1.Fail)) + g.Expect(*wh.FailurePolicy).To(Equal(admissionregistrationv1.Ignore)) g.Expect(*wh.MatchPolicy).To(Equal(admissionregistrationv1.Equivalent)) g.Expect(*wh.ReinvocationPolicy).To(Equal(admissionregistrationv1.IfNeededReinvocationPolicy)) g.Expect(*wh.SideEffects).To(Equal(admissionregistrationv1.SideEffectClassNone)) diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go index c6c8334b5e4..fdc1bc2f04e 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go @@ -2594,7 +2594,7 @@ func (r *reconciler) reconcileAzureIdentityWebhook(ctx context.Context) []error errs = append(errs, fmt.Errorf("failed to reconcile %T %s: %w", clusterRoleBinding, clusterRoleBinding.Name, err)) } - failFailurePolicy := admissionregistrationv1.Fail + ignoreFailurePolicy := admissionregistrationv1.Ignore sideEffectsNone := admissionregistrationv1.SideEffectClassNone matchEquivalent := admissionregistrationv1.Equivalent reinvocationIfNeeded := admissionregistrationv1.IfNeededReinvocationPolicy @@ -2607,7 +2607,7 @@ func (r *reconciler) reconcileAzureIdentityWebhook(ctx context.Context) []error CABundle: []byte(r.rootCA), URL: ptr.To("https://127.0.0.1:9443/mutate-v1-pod"), }, - FailurePolicy: &failFailurePolicy, + FailurePolicy: &ignoreFailurePolicy, MatchPolicy: &matchEquivalent, ReinvocationPolicy: &reinvocationIfNeeded, ObjectSelector: &metav1.LabelSelector{