diff --git a/pkg/types/vsphere/validation/platform.go b/pkg/types/vsphere/validation/platform.go index 75209d8bc0..786559cbe9 100644 --- a/pkg/types/vsphere/validation/platform.go +++ b/pkg/types/vsphere/validation/platform.go @@ -3,7 +3,7 @@ package validation import ( "fmt" "net" - "path/filepath" + "path" "regexp" "strings" @@ -163,6 +163,11 @@ func validateFailureDomains(p *vsphere.Platform, platformFldPath *field.Path, fl var associatedVCenter *vsphere.VCenter zoneNames := make(map[string]string) + type topologyEntry struct { + name string + networks []string + } + fdTopologies := make(map[vsphereTopologyKey][]topologyEntry) for index, failureDomain := range p.FailureDomains { if regionName, ok := zoneNames[failureDomain.Zone]; !ok { @@ -171,6 +176,20 @@ func validateFailureDomains(p *vsphere.Platform, platformFldPath *field.Path, fl allErrs = append(allErrs, field.Invalid(fldPath.Child("zone"), failureDomain.Zone, fmt.Sprintf("cannot be used more than once for the failure domain region %q", failureDomain.Region))) } + topoKey := normalizedTopologyKey(failureDomain) + duplicate := false + for _, entry := range fdTopologies[topoKey] { + if slices.Equal(failureDomain.Topology.Networks, entry.networks) { + allErrs = append(allErrs, field.Invalid(fldPath.Index(index), failureDomain.Name, + fmt.Sprintf("failure domain %q has identical topology (same server, datacenter, computeCluster, datastore, networks, resourcePool, hostGroup) as %q; this provides no additional fault tolerance", failureDomain.Name, entry.name))) + duplicate = true + break + } + } + if !duplicate { + fdTopologies[topoKey] = append(fdTopologies[topoKey], topologyEntry{name: failureDomain.Name, networks: failureDomain.Topology.Networks}) + } + if failureDomain.ZoneType == "" && failureDomain.RegionType == "" { logrus.Debug("using the defaults regionType is Datacenter and zoneType is ComputeCluster") } @@ -249,7 +268,7 @@ func validateFailureDomains(p *vsphere.Platform, platformFldPath *field.Path, fl if !strings.Contains(failureDomain.Topology.Datastore, failureDomain.Topology.Datacenter) { return append(allErrs, field.Invalid(topologyFld.Child("datastore"), failureDomain.Topology.Datastore, "the datastore defined does not exist in the correct datacenter")) } - p.FailureDomains[index].Topology.Datastore = filepath.Clean(p.FailureDomains[index].Topology.Datastore) + p.FailureDomains[index].Topology.Datastore = path.Clean(p.FailureDomains[index].Topology.Datastore) } if len(failureDomain.Topology.TagIDs) > 10 { @@ -304,7 +323,7 @@ func validateFailureDomains(p *vsphere.Platform, platformFldPath *field.Path, fl if len(failureDomain.Topology.Datacenter) != 0 && !strings.Contains(failureDomain.Topology.Datacenter, datacenterName) { return append(allErrs, field.Invalid(topologyFld.Child("computeCluster"), computeCluster, fmt.Sprintf("compute cluster must be in datacenter %s", failureDomain.Topology.Datacenter))) } - p.FailureDomains[index].Topology.ComputeCluster = filepath.Clean(p.FailureDomains[index].Topology.ComputeCluster) + p.FailureDomains[index].Topology.ComputeCluster = path.Clean(p.FailureDomains[index].Topology.ComputeCluster) } if len(failureDomain.Topology.ResourcePool) != 0 { @@ -323,7 +342,7 @@ func validateFailureDomains(p *vsphere.Platform, platformFldPath *field.Path, fl return append(allErrs, field.Invalid(topologyFld.Child("resourcePool"), resourcePool, fmt.Sprintf("resource pool must be in compute cluster %s", failureDomain.Topology.ComputeCluster))) } - p.FailureDomains[index].Topology.ResourcePool = filepath.Clean(p.FailureDomains[index].Topology.ResourcePool) + p.FailureDomains[index].Topology.ResourcePool = path.Clean(p.FailureDomains[index].Topology.ResourcePool) } // Validate that template and clusterOSImage are mutually exclusive @@ -333,13 +352,55 @@ func validateFailureDomains(p *vsphere.Platform, platformFldPath *field.Path, fl } if len(failureDomain.Topology.Template) > 0 { - p.FailureDomains[index].Topology.Template = filepath.Clean(p.FailureDomains[index].Topology.Template) + p.FailureDomains[index].Topology.Template = path.Clean(p.FailureDomains[index].Topology.Template) } } return allErrs } +// vsphereTopologyKey is a comparable struct representing the scalar physical +// infrastructure fields of a failure domain. Used as a map key for first-pass +// dedup. Networks are compared separately (order-sensitive slice comparison) +// because Go map keys cannot contain slices. +// +// Included fields (define physical placement): +// - server, datacenter, computeCluster, datastore, resourcePool, hostGroup +// +// Excluded fields (identity/metadata, not infrastructure): +// - Name, Region, Zone, RegionType, ZoneType (scheduling labels) +// - Folder, Template, TagIDs (VM metadata, not fault-zone identity) +// - Networks (compared separately via slices.Equal, order matters) +type vsphereTopologyKey struct { + server string + datacenter string + computeCluster string + datastore string + resourcePool string + hostGroup string +} + +// normalizedTopologyKey builds a comparable struct key from a failure domain. +// Inventory paths are canonicalized with path.Clean (vSphere paths are URL-style, +// always use forward slashes). +func normalizedTopologyKey(fd vsphere.FailureDomain) vsphereTopologyKey { + normalizePath := func(p string) string { + if p == "" { + return "" + } + return path.Clean(p) + } + + return vsphereTopologyKey{ + server: fd.Server, + datacenter: fd.Topology.Datacenter, + computeCluster: normalizePath(fd.Topology.ComputeCluster), + datastore: normalizePath(fd.Topology.Datastore), + resourcePool: normalizePath(fd.Topology.ResourcePool), + hostGroup: fd.Topology.HostGroup, + } +} + // validateDiskType checks that the specified diskType is valid. func validateDiskType(p *vsphere.Platform, fldPath *field.Path) field.ErrorList { allErrs := field.ErrorList{} diff --git a/pkg/types/vsphere/validation/platform_test.go b/pkg/types/vsphere/validation/platform_test.go index 099188dd99..00fbcb4892 100644 --- a/pkg/types/vsphere/validation/platform_test.go +++ b/pkg/types/vsphere/validation/platform_test.go @@ -378,7 +378,7 @@ func TestValidatePlatform(t *testing.T) { for i := range p.FailureDomains { p.FailureDomains[i].Topology.ComputeCluster = "/test-datacenter/host/HostClusterFolder/test-cluster" - p.FailureDomains[i].Topology.ResourcePool = "/test-datacenter/host/HostClusterFolder/test-cluster/Resources/test-resourcepool" + p.FailureDomains[i].Topology.ResourcePool = fmt.Sprintf("/test-datacenter/host/HostClusterFolder/test-cluster/Resources/test-resourcepool-%d", i) p.FailureDomains[i].Topology.Datastore = "/test-datacenter/datastore/StorageFolder/test-datastore" p.FailureDomains[i].Topology.Folder = "/test-datacenter/vm/VMFolder/test-folder" } @@ -552,6 +552,145 @@ func TestValidatePlatform(t *testing.T) { }(), expectedError: `test-path\.failureDomains\.name\[1\]: Duplicate value: "test-east-1a"`, }, + { + name: "Multi-zone platform failureDomain duplicate topology", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Server = p.FailureDomains[0].Server + return p + }(), + expectedError: `test-path\.failureDomains\[1\]: Invalid value: "test-east-2a": failure domain "test-east-2a" has identical topology .* as "test-east-1a"; this provides no additional fault tolerance`, + }, + { + name: "Valid HostGroup failure domains with same topology but different HostGroups", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[0].RegionType = vsphere.ComputeClusterFailureDomain + p.FailureDomains[0].ZoneType = vsphere.HostGroupFailureDomain + p.FailureDomains[0].Topology.HostGroup = "host-group-a" + p.FailureDomains[0].Topology.ResourcePool = "/test-datacenter/host/test-cluster/Resources/test-resourcepool" + + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].RegionType = vsphere.ComputeClusterFailureDomain + p.FailureDomains[1].ZoneType = vsphere.HostGroupFailureDomain + p.FailureDomains[1].Topology.HostGroup = "host-group-b" + return p + }(), + }, + { + name: "Duplicate topology with same networks same order (multi-net)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[0].Topology.Networks = []string{"net-a", "net-b"} + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Server = p.FailureDomains[0].Server + return p + }(), + expectedError: `test-path\.failureDomains\[1\]: Invalid value: "test-east-2a": failure domain "test-east-2a" has identical topology .* as "test-east-1a"; this provides no additional fault tolerance`, + }, + { + name: "Valid: same networks different order (multi-net, order matters)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[0].Topology.Networks = []string{"net-a", "net-b"} + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Topology.Networks = []string{"net-b", "net-a"} + return p + }(), + }, + { + name: "Valid: different networks (multi-net)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[0].Topology.Networks = []string{"net-a", "net-b"} + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Topology.Networks = []string{"net-a", "net-c"} + return p + }(), + }, + { + name: "Valid: subset networks (different length)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[0].Topology.Networks = []string{"net-a"} + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Topology.Networks = []string{"net-a", "net-b"} + return p + }(), + }, + { + name: "Duplicate topology with path normalization (trailing slash)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Topology.ComputeCluster = "/test-datacenter/host/test-cluster/" + return p + }(), + expectedError: `test-path\.failureDomains\[1\]: Invalid value: "test-east-2a": failure domain "test-east-2a" has identical topology .* as "test-east-1a"; this provides no additional fault tolerance`, + }, + { + name: "Duplicate topology with path normalization (double slash)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Topology.Datastore = "/test-datacenter/datastore//test-datastore" + return p + }(), + expectedError: `test-path\.failureDomains\[1\]: Invalid value: "test-east-2a": failure domain "test-east-2a" has identical topology .* as "test-east-1a"; this provides no additional fault tolerance`, + }, + { + name: "Valid: different datastore (not duplicate)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Topology.Datastore = "/test-datacenter/datastore/other-datastore" + return p + }(), + }, + { + name: "Valid: different server (not duplicate)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Server = "other-vcenter" + return p + }(), + }, + { + name: "Duplicate topology HostGroup zonal same hostGroup", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[0].RegionType = vsphere.ComputeClusterFailureDomain + p.FailureDomains[0].ZoneType = vsphere.HostGroupFailureDomain + p.FailureDomains[0].Topology.HostGroup = "host-group-a" + p.FailureDomains[0].Topology.ResourcePool = "/test-datacenter/host/test-cluster/Resources/test-resourcepool" + + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].RegionType = vsphere.ComputeClusterFailureDomain + p.FailureDomains[1].ZoneType = vsphere.HostGroupFailureDomain + return p + }(), + expectedError: `test-path\.failureDomains\[1\]: Invalid value: "test-east-2a": failure domain "test-east-2a" has identical topology .* as "test-east-1a"; this provides no additional fault tolerance`, + }, + { + name: "Valid: different resourcePool (not duplicate)", + platform: func() *vsphere.Platform { + p := validPlatform() + p.FailureDomains[1].Server = p.FailureDomains[0].Server + p.FailureDomains[1].Topology = p.FailureDomains[0].Topology + p.FailureDomains[1].Topology.ResourcePool = "/test-datacenter/host/test-cluster/Resources/other-pool" + return p + }(), + }, { name: "Multi-zone platform failureDomain zone missing tag category", platform: func() *vsphere.Platform {