From 315b47a12043cc19d59c9cd9a4cc5bc1e5f00a6c Mon Sep 17 00:00:00 2001 From: Chin2691 Date: Wed, 3 Jun 2026 15:53:42 +0530 Subject: [PATCH] feat(azure): add spec-level validation for workload identity config Azure HCP clusters using WorkloadIdentities authentication can enter an opaque degraded state when `hypershift create iam azure` is skipped or fails silently. The operator reconciles credentials with empty client IDs, producing confusing errors. This adds validateWorkloadIdentityConfig to ReconcileCredentials that checks all 7 required clientID fields are present before proceeding. Surfaces a descriptive error via PlatformCredentialsFound condition listing exactly which fields are missing. Phase 2 will wire ValidAzureWorkloadIdentity as a first-class condition on HostedCluster status. Ref: RFE-9351 Co-authored-by: Cursor --- .../v1beta1/hostedcluster_conditions.go | 14 +- .../internal/platform/azure/azure.go | 80 ++++++++++ .../internal/platform/azure/azure_test.go | 148 ++++++++++++++++++ .../v1beta1/hostedcluster_conditions.go | 14 +- 4 files changed, 252 insertions(+), 4 deletions(-) diff --git a/api/hypershift/v1beta1/hostedcluster_conditions.go b/api/hypershift/v1beta1/hostedcluster_conditions.go index 665eeb4ef66..2d4fe9b083b 100644 --- a/api/hypershift/v1beta1/hostedcluster_conditions.go +++ b/api/hypershift/v1beta1/hostedcluster_conditions.go @@ -164,6 +164,13 @@ const ( // A failure here indicates that the input is invalid, or permissions are missing to use the encryption key. ValidAzureKMSConfig ConditionType = "ValidAzureKMSConfig" + // ValidAzureWorkloadIdentity indicates whether the required Azure Workload Identity + // configuration is present and valid in the HostedCluster spec. This validates that + // all mandatory managed identity client IDs are specified for self-managed Azure clusters + // using WorkloadIdentities authentication. A failure here indicates the cluster was + // created without running "hypershift create iam azure" or the spec is incomplete. + ValidAzureWorkloadIdentity ConditionType = "ValidAzureWorkloadIdentity" + // ValidGCPCredentials indicates if GCP credentials are valid and operational // for the HostedCluster. This includes service account authentication and // proper IAM permissions for CAPG controllers. @@ -293,8 +300,11 @@ const ( InvalidIAMRoleReason = "InvalidIAMRole" - InvalidAzureCredentialsReason = "InvalidAzureCredentials" - AzureErrorReason = "AzureError" + InvalidAzureCredentialsReason = "InvalidAzureCredentials" + AzureWorkloadIdentityNotConfiguredReason = "WorkloadIdentityNotConfigured" + AzureWorkloadIdentityIncompleteReason = "WorkloadIdentityIncomplete" + AzureWorkloadIdentityValidReason = "ValidWorkloadIdentityConfiguration" + AzureErrorReason = "AzureError" ExternalDNSHostNotReachableReason = "ExternalDNSHostNotReachable" diff --git a/hypershift-operator/controllers/hostedcluster/internal/platform/azure/azure.go b/hypershift-operator/controllers/hostedcluster/internal/platform/azure/azure.go index c93c3417c58..4cc384b4318 100644 --- a/hypershift-operator/controllers/hostedcluster/internal/platform/azure/azure.go +++ b/hypershift-operator/controllers/hostedcluster/internal/platform/azure/azure.go @@ -205,6 +205,7 @@ func (a Azure) CAPIProviderDeploymentSpec(hcluster *hyperv1.HostedCluster, _ *hy // For self-managed Azure with workload identity, instruct Azure SDK to use the minted SA token if azureutil.IsSelfManagedAzureWithWorkloadIdentity(hcluster.Spec.Platform.Type, hcluster.Spec.Platform.Azure) { + deploymentSpec.Template.Spec.Containers[0].Env = append( deploymentSpec.Template.Spec.Containers[0].Env, corev1.EnvVar{ @@ -281,6 +282,15 @@ func (a Azure) ReconcileCredentials(ctx context.Context, c client.Client, create // For self-managed Azure, use workload identity credentials; for managed Azure, use the existing approach if azureutil.IsSelfManagedAzureWithWorkloadIdentity(hcluster.Spec.Platform.Type, hcluster.Spec.Platform.Azure) { + // Validate workload identity configuration before proceeding. + // TODO(RFE-9351 Phase 2): Set ValidAzureWorkloadIdentity condition directly on + // hcluster.Status.Conditions instead of relying on error propagation through + // PlatformCredentialsFound. This requires either changing ReconcileCredentials to + // return []metav1.Condition or moving validation to the controller level. + if condition := validateWorkloadIdentityConfig(hcluster); condition != nil && condition.Status == metav1.ConditionFalse { + return fmt.Errorf("azure workload identity validation failed: %s", condition.Message) + } + // Add federated token file for workload identity authentication baseSecretData["azure_federated_token_file"] = []byte(path.Join(config.CloudTokenMountPath, "token")) @@ -325,6 +335,7 @@ func (a Azure) ReconcileCredentials(ctx context.Context, c client.Client, create secretData := maps.Clone(baseSecretData) // For self-managed Azure with workload identities, add the network client ID if azureutil.IsSelfManagedAzureWithWorkloadIdentity(hcluster.Spec.Platform.Type, hcluster.Spec.Platform.Azure) { + secretData["azure_client_id"] = []byte(hcluster.Spec.Platform.Azure.AzureAuthenticationConfig.WorkloadIdentities.Network.ClientID) } cloudNetworkConfigCreds.Data = secretData @@ -486,3 +497,72 @@ func reconcileKMSConfigSecret(secret *corev1.Secret, hc *hyperv1.HostedCluster) return nil } + +// validateWorkloadIdentityConfig validates that all required Azure Workload Identity +// client IDs are present in the HostedCluster spec. Returns a metav1.Condition +// reflecting the validation result. Currently the caller uses only the error path +// (condition.Status == ConditionFalse); the condition struct is retained so Phase 2 +// can wire it directly onto HostedCluster.Status.Conditions. +func validateWorkloadIdentityConfig(hcluster *hyperv1.HostedCluster) *metav1.Condition { + azureSpec := hcluster.Spec.Platform.Azure + if azureSpec == nil { + return nil + } + + authConfig := azureSpec.AzureAuthenticationConfig + if authConfig.AzureAuthenticationConfigType != hyperv1.AzureAuthenticationTypeWorkloadIdentities { + return nil + } + + wi := authConfig.WorkloadIdentities + if wi == nil { + return &metav1.Condition{ + Type: string(hyperv1.ValidAzureWorkloadIdentity), + Status: metav1.ConditionFalse, + Reason: hyperv1.AzureWorkloadIdentityNotConfiguredReason, + ObservedGeneration: hcluster.Generation, + Message: "workloadIdentities is not configured; run 'hypershift create iam azure' to create the required managed identities and federated credentials", + } + } + + var missing []string + if wi.CloudProvider.ClientID == "" { + missing = append(missing, "cloudProvider") + } + if wi.NodePoolManagement.ClientID == "" { + missing = append(missing, "nodePoolManagement") + } + if wi.Ingress.ClientID == "" { + missing = append(missing, "ingress") + } + if wi.ImageRegistry.ClientID == "" { + missing = append(missing, "imageRegistry") + } + if wi.Disk.ClientID == "" { + missing = append(missing, "disk") + } + if wi.File.ClientID == "" { + missing = append(missing, "file") + } + if wi.Network.ClientID == "" { + missing = append(missing, "network") + } + + if len(missing) > 0 { + return &metav1.Condition{ + Type: string(hyperv1.ValidAzureWorkloadIdentity), + Status: metav1.ConditionFalse, + Reason: hyperv1.AzureWorkloadIdentityIncompleteReason, + ObservedGeneration: hcluster.Generation, + Message: fmt.Sprintf("workloadIdentities is missing required clientID for: %s", strings.Join(missing, ", ")), + } + } + + return &metav1.Condition{ + Type: string(hyperv1.ValidAzureWorkloadIdentity), + Status: metav1.ConditionTrue, + Reason: hyperv1.AzureWorkloadIdentityValidReason, + ObservedGeneration: hcluster.Generation, + Message: "All required workload identity client IDs are configured", + } +} diff --git a/hypershift-operator/controllers/hostedcluster/internal/platform/azure/azure_test.go b/hypershift-operator/controllers/hostedcluster/internal/platform/azure/azure_test.go index b71468ea237..15236e0052f 100644 --- a/hypershift-operator/controllers/hostedcluster/internal/platform/azure/azure_test.go +++ b/hypershift-operator/controllers/hostedcluster/internal/platform/azure/azure_test.go @@ -260,6 +260,12 @@ func TestReconcileCredentials(t *testing.T) { name: "self-managed Azure with workload identities creates all credential secrets", managedService: "", hcluster: createTestHostedCluster(true, &hyperv1.AzureWorkloadIdentities{ + CloudProvider: hyperv1.WorkloadIdentity{ + ClientID: "cloud-provider-client-id", + }, + NodePoolManagement: hyperv1.WorkloadIdentity{ + ClientID: "nodepool-mgmt-client-id", + }, Ingress: hyperv1.WorkloadIdentity{ ClientID: "ingress-client-id", }, @@ -315,6 +321,12 @@ func TestReconcileCredentials(t *testing.T) { managedService: "", hcluster: func() *hyperv1.HostedCluster { hc := createTestHostedCluster(true, &hyperv1.AzureWorkloadIdentities{ + CloudProvider: hyperv1.WorkloadIdentity{ + ClientID: "cloud-provider-client-id", + }, + NodePoolManagement: hyperv1.WorkloadIdentity{ + ClientID: "nodepool-mgmt-client-id", + }, Ingress: hyperv1.WorkloadIdentity{ ClientID: "ingress-client-id", }, @@ -527,3 +539,139 @@ func TestReconcileKMSConfigSecret(t *testing.T) { }) } } + +func TestValidateWorkloadIdentityConfig(t *testing.T) { + tests := []struct { + name string + hcluster *hyperv1.HostedCluster + expectNil bool + expectStatus metav1.ConditionStatus + expectReason string + expectContains string + }{ + { + name: "non-Azure platform returns nil", + hcluster: &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AWSPlatform, + }, + }, + }, + expectNil: true, + }, + { + name: "Azure with ManagedIdentities auth returns nil", + hcluster: &hyperv1.HostedCluster{ + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AzurePlatform, + Azure: &hyperv1.AzurePlatformSpec{ + AzureAuthenticationConfig: hyperv1.AzureAuthenticationConfiguration{ + AzureAuthenticationConfigType: hyperv1.AzureAuthenticationTypeManagedIdentities, + }, + }, + }, + }, + }, + expectNil: true, + }, + { + name: "WorkloadIdentities auth with nil WorkloadIdentities returns not configured", + hcluster: &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{Generation: 3}, + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AzurePlatform, + Azure: &hyperv1.AzurePlatformSpec{ + AzureAuthenticationConfig: hyperv1.AzureAuthenticationConfiguration{ + AzureAuthenticationConfigType: hyperv1.AzureAuthenticationTypeWorkloadIdentities, + }, + }, + }, + }, + }, + expectNil: false, + expectStatus: metav1.ConditionFalse, + expectReason: hyperv1.AzureWorkloadIdentityNotConfiguredReason, + expectContains: "not configured", + }, + { + name: "WorkloadIdentities with missing clientIDs returns incomplete", + hcluster: &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{Generation: 5}, + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AzurePlatform, + Azure: &hyperv1.AzurePlatformSpec{ + AzureAuthenticationConfig: hyperv1.AzureAuthenticationConfiguration{ + AzureAuthenticationConfigType: hyperv1.AzureAuthenticationTypeWorkloadIdentities, + WorkloadIdentities: &hyperv1.AzureWorkloadIdentities{ + CloudProvider: hyperv1.WorkloadIdentity{ClientID: "client-1"}, + NodePoolManagement: hyperv1.WorkloadIdentity{ClientID: "client-2"}, + Ingress: hyperv1.WorkloadIdentity{ClientID: ""}, + ImageRegistry: hyperv1.WorkloadIdentity{ClientID: "client-4"}, + Disk: hyperv1.WorkloadIdentity{ClientID: "client-5"}, + File: hyperv1.WorkloadIdentity{ClientID: ""}, + Network: hyperv1.WorkloadIdentity{ClientID: "client-7"}, + }, + }, + }, + }, + }, + }, + expectNil: false, + expectStatus: metav1.ConditionFalse, + expectReason: hyperv1.AzureWorkloadIdentityIncompleteReason, + expectContains: "ingress, file", + }, + { + name: "WorkloadIdentities with all clientIDs returns valid", + hcluster: &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{Generation: 7}, + Spec: hyperv1.HostedClusterSpec{ + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AzurePlatform, + Azure: &hyperv1.AzurePlatformSpec{ + AzureAuthenticationConfig: hyperv1.AzureAuthenticationConfiguration{ + AzureAuthenticationConfigType: hyperv1.AzureAuthenticationTypeWorkloadIdentities, + WorkloadIdentities: &hyperv1.AzureWorkloadIdentities{ + CloudProvider: hyperv1.WorkloadIdentity{ClientID: "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"}, + NodePoolManagement: hyperv1.WorkloadIdentity{ClientID: "aaaaaaaa-bbbb-cccc-dddd-ffffffffffff"}, + Ingress: hyperv1.WorkloadIdentity{ClientID: "aaaaaaaa-bbbb-cccc-dddd-111111111111"}, + ImageRegistry: hyperv1.WorkloadIdentity{ClientID: "aaaaaaaa-bbbb-cccc-dddd-222222222222"}, + Disk: hyperv1.WorkloadIdentity{ClientID: "aaaaaaaa-bbbb-cccc-dddd-333333333333"}, + File: hyperv1.WorkloadIdentity{ClientID: "aaaaaaaa-bbbb-cccc-dddd-444444444444"}, + Network: hyperv1.WorkloadIdentity{ClientID: "aaaaaaaa-bbbb-cccc-dddd-555555555555"}, + }, + }, + }, + }, + }, + }, + expectNil: false, + expectStatus: metav1.ConditionTrue, + expectReason: hyperv1.AzureWorkloadIdentityValidReason, + expectContains: "All required", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + condition := validateWorkloadIdentityConfig(tc.hcluster) + + if tc.expectNil { + g.Expect(condition).To(BeNil()) + return + } + + g.Expect(condition).NotTo(BeNil()) + g.Expect(condition.Type).To(Equal(string(hyperv1.ValidAzureWorkloadIdentity))) + g.Expect(condition.Status).To(Equal(tc.expectStatus)) + g.Expect(condition.Reason).To(Equal(tc.expectReason)) + g.Expect(condition.Message).To(ContainSubstring(tc.expectContains)) + g.Expect(condition.ObservedGeneration).To(Equal(tc.hcluster.Generation)) + }) + } +} diff --git a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_conditions.go b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_conditions.go index 665eeb4ef66..2d4fe9b083b 100644 --- a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_conditions.go +++ b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_conditions.go @@ -164,6 +164,13 @@ const ( // A failure here indicates that the input is invalid, or permissions are missing to use the encryption key. ValidAzureKMSConfig ConditionType = "ValidAzureKMSConfig" + // ValidAzureWorkloadIdentity indicates whether the required Azure Workload Identity + // configuration is present and valid in the HostedCluster spec. This validates that + // all mandatory managed identity client IDs are specified for self-managed Azure clusters + // using WorkloadIdentities authentication. A failure here indicates the cluster was + // created without running "hypershift create iam azure" or the spec is incomplete. + ValidAzureWorkloadIdentity ConditionType = "ValidAzureWorkloadIdentity" + // ValidGCPCredentials indicates if GCP credentials are valid and operational // for the HostedCluster. This includes service account authentication and // proper IAM permissions for CAPG controllers. @@ -293,8 +300,11 @@ const ( InvalidIAMRoleReason = "InvalidIAMRole" - InvalidAzureCredentialsReason = "InvalidAzureCredentials" - AzureErrorReason = "AzureError" + InvalidAzureCredentialsReason = "InvalidAzureCredentials" + AzureWorkloadIdentityNotConfiguredReason = "WorkloadIdentityNotConfigured" + AzureWorkloadIdentityIncompleteReason = "WorkloadIdentityIncomplete" + AzureWorkloadIdentityValidReason = "ValidWorkloadIdentityConfiguration" + AzureErrorReason = "AzureError" ExternalDNSHostNotReachableReason = "ExternalDNSHostNotReachable"