From e082536bc8fa57e670713872684134a25507889f Mon Sep 17 00:00:00 2001 From: Chin2691 Date: Tue, 19 May 2026 17:29:11 +0530 Subject: [PATCH 1/3] fix: validate duplicate Nutanix failureDomain names and topology Adds two checks to Nutanix failure domain validation: 1. Duplicate name detection: rejects configurations where two failure domains share the same name, which previously went unvalidated. 2. Duplicate topology detection: rejects failure domains that have identical Prism Element UUID and subnet UUIDs. Copy-pasted failure domains that differ only in name provide no additional fault tolerance and can cause subtle scheduling issues. Bug: https://redhat.atlassian.net/browse/OCPBUGS-86073 Co-authored-by: Cursor --- pkg/types/nutanix/validation/platform.go | 30 +++++++++- pkg/types/nutanix/validation/platform_test.go | 59 +++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/pkg/types/nutanix/validation/platform.go b/pkg/types/nutanix/validation/platform.go index b981d3555a..73cdad526e 100644 --- a/pkg/types/nutanix/validation/platform.go +++ b/pkg/types/nutanix/validation/platform.go @@ -3,6 +3,8 @@ package validation import ( "fmt" "regexp" + "sort" + "strings" "k8s.io/apimachinery/pkg/util/validation/field" @@ -103,11 +105,28 @@ func ValidatePlatform(p *nutanix.Platform, fldPath *field.Path, c *types.Install if err != nil { allErrs = append(allErrs, field.InternalError(fldPath.Child("failureDomain", "name"), fmt.Errorf("fail to compile the pattern %q: %w", pattern, err))) } else { - for _, fd := range p.FailureDomains { + fdNames := make(map[string]int) + fdTopologies := make(map[string]string) + + for idx, fd := range p.FailureDomains { if !rexp.MatchString(fd.Name) { allErrs = append(allErrs, field.Invalid(fldPath.Child("failureDomain", "name"), fd.Name, fmt.Sprintf("failureDomain name should match the pattern %q.", pattern))) } + if prevIdx, exists := fdNames[fd.Name]; exists { + allErrs = append(allErrs, field.Duplicate(fldPath.Child("failureDomains").Index(idx).Child("name"), fmt.Sprintf("failure domain name %q is already used by failureDomains[%d]", fd.Name, prevIdx))) + } else { + fdNames[fd.Name] = idx + } + + topoKey := nutanixFailureDomainTopologyKey(fd) + if prevName, exists := fdTopologies[topoKey]; exists { + allErrs = append(allErrs, field.Invalid(fldPath.Child("failureDomains").Index(idx), fd.Name, + fmt.Sprintf("failure domain %q has identical topology (same prismElement and subnets) as %q; this provides no additional fault tolerance", fd.Name, prevName))) + } else { + fdTopologies[topoKey] = fd.Name + } + if fd.PrismElement.UUID == "" { allErrs = append(allErrs, field.Required(fldPath.Child("failureDomain", "prismElement", "uuid"), "failureDomain prismElement uuid cannot be empty")) } @@ -153,6 +172,15 @@ func validateLoadBalancer(lbType configv1.PlatformLoadBalancerType) bool { } } +// nutanixFailureDomainTopologyKey builds a comparable key from the infrastructure-defining +// fields of a failure domain: Prism Element UUID and sorted subnet UUIDs. +func nutanixFailureDomainTopologyKey(fd nutanix.FailureDomain) string { + subnets := make([]string, len(fd.SubnetUUIDs)) + copy(subnets, fd.SubnetUUIDs) + sort.Strings(subnets) + return fmt.Sprintf("pe=%s;subnets=%s", fd.PrismElement.UUID, strings.Join(subnets, ",")) +} + // validateSubnets validates the input subnetUUIDs meet the configuration requirements. func validateSubnets(fldPath *field.Path, subnetUUIDs []string) field.ErrorList { var errs field.ErrorList diff --git a/pkg/types/nutanix/validation/platform_test.go b/pkg/types/nutanix/validation/platform_test.go index 7ac9019697..2e07ddb773 100644 --- a/pkg/types/nutanix/validation/platform_test.go +++ b/pkg/types/nutanix/validation/platform_test.go @@ -409,6 +409,65 @@ func TestValidatePlatform(t *testing.T) { return p }(), }, + { + name: "failureDomain with duplicate name", + platform: func() *nutanix.Platform { + p := validPlatform() + p.FailureDomains = []nutanix.FailureDomain{ + { + Name: "fd-1", + PrismElement: nutanix.PrismElement{UUID: "fd-pe-uuid-1", Endpoint: nutanix.PrismEndpoint{Address: "fd-pe-1", Port: 9440}}, + SubnetUUIDs: []string{"fd-subnet-uuid-1"}, + }, + { + Name: "fd-1", + PrismElement: nutanix.PrismElement{UUID: "fd-pe-uuid-2", Endpoint: nutanix.PrismEndpoint{Address: "fd-pe-2", Port: 9440}}, + SubnetUUIDs: []string{"fd-subnet-uuid-2"}, + }, + } + return p + }(), + expectedError: `test-path\.failureDomains\[1\]\.name: Duplicate value: "failure domain name \\"fd-1\\" is already used by failureDomains\[0\]"`, + }, + { + name: "failureDomain with duplicate topology same prismElement and subnet", + platform: func() *nutanix.Platform { + p := validPlatform() + p.FailureDomains = []nutanix.FailureDomain{ + { + Name: "fd-1", + PrismElement: nutanix.PrismElement{UUID: "fd-pe-uuid", Endpoint: nutanix.PrismEndpoint{Address: "fd-pe", Port: 9440}}, + SubnetUUIDs: []string{"fd-subnet-uuid"}, + }, + { + Name: "fd-2", + PrismElement: nutanix.PrismElement{UUID: "fd-pe-uuid", Endpoint: nutanix.PrismEndpoint{Address: "fd-pe", Port: 9440}}, + SubnetUUIDs: []string{"fd-subnet-uuid"}, + }, + } + return p + }(), + expectedError: `test-path\.failureDomains\[1\]: Invalid value: "fd-2": failure domain "fd-2" has identical topology \(same prismElement and subnets\) as "fd-1"; this provides no additional fault tolerance`, + }, + { + name: "valid failureDomain with different prismElements", + platform: func() *nutanix.Platform { + p := validPlatform() + p.FailureDomains = []nutanix.FailureDomain{ + { + Name: "fd-1", + PrismElement: nutanix.PrismElement{UUID: "fd-pe-uuid-1", Endpoint: nutanix.PrismEndpoint{Address: "fd-pe-1", Port: 9440}}, + SubnetUUIDs: []string{"fd-subnet-uuid"}, + }, + { + Name: "fd-2", + PrismElement: nutanix.PrismElement{UUID: "fd-pe-uuid-2", Endpoint: nutanix.PrismEndpoint{Address: "fd-pe-2", Port: 9440}}, + SubnetUUIDs: []string{"fd-subnet-uuid"}, + }, + } + return p + }(), + }, { name: "valid failureDomain with multiple subnets for multi-NIC", platform: func() *nutanix.Platform { From e540fe7b4baeda71e6c14e27221877e5bcbbcaaa Mon Sep 17 00:00:00 2001 From: Chin2691 Date: Wed, 20 May 2026 08:29:40 +0530 Subject: [PATCH 2/3] fix: reorder topology check after required-field validation - Move duplicate topology detection after PrismElement UUID and subnet validation so users see actionable "required field" errors instead of misleading "identical topology" when fields are simply empty - Use \x00 as subnet separator to eliminate join ambiguity - Add regression test confirming subnet-order invariance: identical subnet sets in different order are correctly detected as duplicates Bug: https://redhat.atlassian.net/browse/OCPBUGS-86133 Co-authored-by: Cursor --- pkg/types/nutanix/validation/platform.go | 35 +++++++++++-------- pkg/types/nutanix/validation/platform_test.go | 20 +++++++++++ 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/pkg/types/nutanix/validation/platform.go b/pkg/types/nutanix/validation/platform.go index 73cdad526e..bf9ecdc7f8 100644 --- a/pkg/types/nutanix/validation/platform.go +++ b/pkg/types/nutanix/validation/platform.go @@ -113,12 +113,25 @@ func ValidatePlatform(p *nutanix.Platform, fldPath *field.Path, c *types.Install allErrs = append(allErrs, field.Invalid(fldPath.Child("failureDomain", "name"), fd.Name, fmt.Sprintf("failureDomain name should match the pattern %q.", pattern))) } - if prevIdx, exists := fdNames[fd.Name]; exists { - allErrs = append(allErrs, field.Duplicate(fldPath.Child("failureDomains").Index(idx).Child("name"), fmt.Sprintf("failure domain name %q is already used by failureDomains[%d]", fd.Name, prevIdx))) - } else { - fdNames[fd.Name] = idx - } + if prevIdx, exists := fdNames[fd.Name]; exists { + allErrs = append(allErrs, field.Duplicate(fldPath.Child("failureDomains").Index(idx).Child("name"), fmt.Sprintf("failure domain name %q is already used by failureDomains[%d]", fd.Name, prevIdx))) + } else { + fdNames[fd.Name] = idx + } + if fd.PrismElement.UUID == "" { + allErrs = append(allErrs, field.Required(fldPath.Child("failureDomain", "prismElement", "uuid"), "failureDomain prismElement uuid cannot be empty")) + } + + // validate subnets configuration + if errs := validateSubnets(fldPath.Child("failureDomain", "subnetUUIDs"), fd.SubnetUUIDs); len(errs) > 0 { + allErrs = append(allErrs, errs...) + } + + // Only check for duplicate topology after basic required-field validation, + // so users see actionable "required field" errors instead of misleading + // "identical topology" when fields are simply empty. + if fd.PrismElement.UUID != "" && len(fd.SubnetUUIDs) > 0 { topoKey := nutanixFailureDomainTopologyKey(fd) if prevName, exists := fdTopologies[topoKey]; exists { allErrs = append(allErrs, field.Invalid(fldPath.Child("failureDomains").Index(idx), fd.Name, @@ -126,15 +139,7 @@ func ValidatePlatform(p *nutanix.Platform, fldPath *field.Path, c *types.Install } else { fdTopologies[topoKey] = fd.Name } - - if fd.PrismElement.UUID == "" { - allErrs = append(allErrs, field.Required(fldPath.Child("failureDomain", "prismElement", "uuid"), "failureDomain prismElement uuid cannot be empty")) - } - - // validate subnets configuration - if errs := validateSubnets(fldPath.Child("failureDomain", "subnetUUIDs"), fd.SubnetUUIDs); len(errs) > 0 { - allErrs = append(allErrs, errs...) - } + } for _, sc := range fd.StorageContainers { if sc.ReferenceName == "" { @@ -178,7 +183,7 @@ func nutanixFailureDomainTopologyKey(fd nutanix.FailureDomain) string { subnets := make([]string, len(fd.SubnetUUIDs)) copy(subnets, fd.SubnetUUIDs) sort.Strings(subnets) - return fmt.Sprintf("pe=%s;subnets=%s", fd.PrismElement.UUID, strings.Join(subnets, ",")) + return fmt.Sprintf("pe=%s;subnets=%s", fd.PrismElement.UUID, strings.Join(subnets, "\x00")) } // validateSubnets validates the input subnetUUIDs meet the configuration requirements. diff --git a/pkg/types/nutanix/validation/platform_test.go b/pkg/types/nutanix/validation/platform_test.go index 2e07ddb773..27adc2f185 100644 --- a/pkg/types/nutanix/validation/platform_test.go +++ b/pkg/types/nutanix/validation/platform_test.go @@ -468,6 +468,26 @@ func TestValidatePlatform(t *testing.T) { return p }(), }, + { + name: "failureDomain duplicate topology with same subnets in different order", + platform: func() *nutanix.Platform { + p := validPlatform() + p.FailureDomains = []nutanix.FailureDomain{ + { + Name: "fd-1", + PrismElement: nutanix.PrismElement{UUID: "fd-pe-uuid", Endpoint: nutanix.PrismEndpoint{Address: "fd-pe", Port: 9440}}, + SubnetUUIDs: []string{"subnet-a", "subnet-b"}, + }, + { + Name: "fd-2", + PrismElement: nutanix.PrismElement{UUID: "fd-pe-uuid", Endpoint: nutanix.PrismEndpoint{Address: "fd-pe", Port: 9440}}, + SubnetUUIDs: []string{"subnet-b", "subnet-a"}, + }, + } + return p + }(), + expectedError: `test-path\.failureDomains\[1\]: Invalid value: "fd-2": failure domain "fd-2" has identical topology \(same prismElement and subnets\) as "fd-1"; this provides no additional fault tolerance`, + }, { name: "valid failureDomain with multiple subnets for multi-NIC", platform: func() *nutanix.Platform { From f92a064003bf98c1e63cc19bca52fe7e1d06439a Mon Sep 17 00:00:00 2001 From: Chin2691 Date: Tue, 2 Jun 2026 11:42:33 +0530 Subject: [PATCH 3/3] fix: correct indentation in duplicate failureDomain validation The duplicate name check and topology validation blocks were indented at 3 tabs instead of 4 inside the for-range loop, causing gofmt and golint CI failures. Co-authored-by: Cursor --- pkg/types/nutanix/validation/platform.go | 46 ++++++++++++------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pkg/types/nutanix/validation/platform.go b/pkg/types/nutanix/validation/platform.go index bf9ecdc7f8..6009d0939c 100644 --- a/pkg/types/nutanix/validation/platform.go +++ b/pkg/types/nutanix/validation/platform.go @@ -113,33 +113,33 @@ func ValidatePlatform(p *nutanix.Platform, fldPath *field.Path, c *types.Install allErrs = append(allErrs, field.Invalid(fldPath.Child("failureDomain", "name"), fd.Name, fmt.Sprintf("failureDomain name should match the pattern %q.", pattern))) } - if prevIdx, exists := fdNames[fd.Name]; exists { - allErrs = append(allErrs, field.Duplicate(fldPath.Child("failureDomains").Index(idx).Child("name"), fmt.Sprintf("failure domain name %q is already used by failureDomains[%d]", fd.Name, prevIdx))) - } else { - fdNames[fd.Name] = idx - } + if prevIdx, exists := fdNames[fd.Name]; exists { + allErrs = append(allErrs, field.Duplicate(fldPath.Child("failureDomains").Index(idx).Child("name"), fmt.Sprintf("failure domain name %q is already used by failureDomains[%d]", fd.Name, prevIdx))) + } else { + fdNames[fd.Name] = idx + } - if fd.PrismElement.UUID == "" { - allErrs = append(allErrs, field.Required(fldPath.Child("failureDomain", "prismElement", "uuid"), "failureDomain prismElement uuid cannot be empty")) - } + if fd.PrismElement.UUID == "" { + allErrs = append(allErrs, field.Required(fldPath.Child("failureDomain", "prismElement", "uuid"), "failureDomain prismElement uuid cannot be empty")) + } - // validate subnets configuration - if errs := validateSubnets(fldPath.Child("failureDomain", "subnetUUIDs"), fd.SubnetUUIDs); len(errs) > 0 { - allErrs = append(allErrs, errs...) - } + // validate subnets configuration + if errs := validateSubnets(fldPath.Child("failureDomain", "subnetUUIDs"), fd.SubnetUUIDs); len(errs) > 0 { + allErrs = append(allErrs, errs...) + } - // Only check for duplicate topology after basic required-field validation, - // so users see actionable "required field" errors instead of misleading - // "identical topology" when fields are simply empty. - if fd.PrismElement.UUID != "" && len(fd.SubnetUUIDs) > 0 { - topoKey := nutanixFailureDomainTopologyKey(fd) - if prevName, exists := fdTopologies[topoKey]; exists { - allErrs = append(allErrs, field.Invalid(fldPath.Child("failureDomains").Index(idx), fd.Name, - fmt.Sprintf("failure domain %q has identical topology (same prismElement and subnets) as %q; this provides no additional fault tolerance", fd.Name, prevName))) - } else { - fdTopologies[topoKey] = fd.Name + // Only check for duplicate topology after basic required-field validation, + // so users see actionable "required field" errors instead of misleading + // "identical topology" when fields are simply empty. + if fd.PrismElement.UUID != "" && len(fd.SubnetUUIDs) > 0 { + topoKey := nutanixFailureDomainTopologyKey(fd) + if prevName, exists := fdTopologies[topoKey]; exists { + allErrs = append(allErrs, field.Invalid(fldPath.Child("failureDomains").Index(idx), fd.Name, + fmt.Sprintf("failure domain %q has identical topology (same prismElement and subnets) as %q; this provides no additional fault tolerance", fd.Name, prevName))) + } else { + fdTopologies[topoKey] = fd.Name + } } - } for _, sc := range fd.StorageContainers { if sc.ReferenceName == "" {