From c065c5b74dcf66621d20235b1835ec5e0e83a326 Mon Sep 17 00:00:00 2001 From: Abhijeet Sadawarte Date: Tue, 2 Jun 2026 15:26:07 +0530 Subject: [PATCH] ibmcloud: add preflight quota validation for VPC IPI installations Implement PlatformQuotaCheck for IBM Cloud VPC following the existing AWS/GCP pattern. Checks floating IP, security group, load balancer, and instance counts against known default limits before creating any infrastructure. During 4.22 rc5 testing, an IBM Cloud IPI install failed 20 minutes in when the floating IP quota (40/40) was exhausted. The installer had already created instances, load balancers, security groups, and uploaded the RHCOS image before discovering the limit. All resources were orphaned and required manual cleanup. With this change, the installer validates resource availability before provisioning and fails fast with a clear quota error. Assisted-by: Claude Code RFE-9374 --- pkg/asset/installconfig/ibmcloud/client.go | 69 +++++++++++++++ pkg/asset/quota/ibmcloud/OWNERS | 7 ++ pkg/asset/quota/ibmcloud/ibmcloud.go | 85 +++++++++++++++++++ pkg/asset/quota/quota.go | 21 ++++- pkg/quota/ibmcloud/OWNERS | 7 ++ pkg/quota/ibmcloud/ibmcloud.go | 97 ++++++++++++++++++++++ pkg/quota/ibmcloud/ibmcloud_test.go | 60 +++++++++++++ 7 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 pkg/asset/quota/ibmcloud/OWNERS create mode 100644 pkg/asset/quota/ibmcloud/ibmcloud.go create mode 100644 pkg/quota/ibmcloud/OWNERS create mode 100644 pkg/quota/ibmcloud/ibmcloud.go create mode 100644 pkg/quota/ibmcloud/ibmcloud_test.go diff --git a/pkg/asset/installconfig/ibmcloud/client.go b/pkg/asset/installconfig/ibmcloud/client.go index 3b31fb95821..fb7fb3423db 100644 --- a/pkg/asset/installconfig/ibmcloud/client.go +++ b/pkg/asset/installconfig/ibmcloud/client.go @@ -1528,6 +1528,75 @@ func (c *Client) getKeyServiceAPI(crn ibmcrn.CRN) (*kpclient.Client, error) { return kpclient.New(clientConfig, kpclient.DefaultTransport()) } +// ListFloatingIPs lists all floating IPs in the region. +func (c *Client) ListFloatingIPs(ctx context.Context, region string) ([]vpcv1.FloatingIP, error) { + localContext, cancel := context.WithTimeout(ctx, 1*time.Minute) + defer cancel() + + if err := c.SetVPCServiceURLForRegion(localContext, region); err != nil { + return nil, fmt.Errorf("failed to set vpc api service url: %w", err) + } + + pager, err := c.vpcAPI.NewFloatingIpsPager(c.vpcAPI.NewListFloatingIpsOptions()) + if err != nil { + return nil, fmt.Errorf("failed to create floating ips pager: %w", err) + } + return pager.GetAllWithContext(localContext) +} + +// ListSecurityGroups lists security groups in the region. +// If vpcID is non-empty, results are scoped to that VPC. +func (c *Client) ListSecurityGroups(ctx context.Context, region string, vpcID string) ([]vpcv1.SecurityGroup, error) { + localContext, cancel := context.WithTimeout(ctx, 1*time.Minute) + defer cancel() + + if err := c.SetVPCServiceURLForRegion(localContext, region); err != nil { + return nil, fmt.Errorf("failed to set vpc api service url: %w", err) + } + + options := c.vpcAPI.NewListSecurityGroupsOptions() + if vpcID != "" { + options.SetVPCID(vpcID) + } + pager, err := c.vpcAPI.NewSecurityGroupsPager(options) + if err != nil { + return nil, fmt.Errorf("failed to create security groups pager: %w", err) + } + return pager.GetAllWithContext(localContext) +} + +// ListLoadBalancers lists all load balancers in the region. +func (c *Client) ListLoadBalancers(ctx context.Context, region string) ([]vpcv1.LoadBalancer, error) { + localContext, cancel := context.WithTimeout(ctx, 1*time.Minute) + defer cancel() + + if err := c.SetVPCServiceURLForRegion(localContext, region); err != nil { + return nil, fmt.Errorf("failed to set vpc api service url: %w", err) + } + + pager, err := c.vpcAPI.NewLoadBalancersPager(c.vpcAPI.NewListLoadBalancersOptions()) + if err != nil { + return nil, fmt.Errorf("failed to create load balancers pager: %w", err) + } + return pager.GetAllWithContext(localContext) +} + +// ListInstances lists all instances in the region. +func (c *Client) ListInstances(ctx context.Context, region string) ([]vpcv1.Instance, error) { + localContext, cancel := context.WithTimeout(ctx, 1*time.Minute) + defer cancel() + + if err := c.SetVPCServiceURLForRegion(localContext, region); err != nil { + return nil, fmt.Errorf("failed to set vpc api service url: %w", err) + } + + pager, err := c.vpcAPI.NewInstancesPager(c.vpcAPI.NewListInstancesOptions()) + if err != nil { + return nil, fmt.Errorf("failed to create instances pager: %w", err) + } + return pager.GetAllWithContext(localContext) +} + // ListCOSBuckets lists Buckets in the specified COS Instance. func (c *Client) ListCOSBuckets(ctx context.Context, cosInstanceID string, region string) (*ibms3.ListBucketsOutput, error) { localContext, cancel := context.WithTimeout(ctx, 1*time.Minute) diff --git a/pkg/asset/quota/ibmcloud/OWNERS b/pkg/asset/quota/ibmcloud/OWNERS new file mode 100644 index 00000000000..7a92d60f198 --- /dev/null +++ b/pkg/asset/quota/ibmcloud/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +approvers: + - ibmcloud-approvers +reviewers: + - ibmcloud-reviewers diff --git a/pkg/asset/quota/ibmcloud/ibmcloud.go b/pkg/asset/quota/ibmcloud/ibmcloud.go new file mode 100644 index 00000000000..5941bb2ea86 --- /dev/null +++ b/pkg/asset/quota/ibmcloud/ibmcloud.go @@ -0,0 +1,85 @@ +package ibmcloud + +import ( + "context" + "fmt" + + ibmcloudic "github.com/openshift/installer/pkg/asset/installconfig/ibmcloud" + "github.com/openshift/installer/pkg/quota" + "github.com/openshift/installer/pkg/types" +) + +// defaultLimits are IBM Cloud VPC default resource limits. +// Actual limits may vary by account. The check warns rather than +// blocks when it cannot determine the actual limit. +var defaultLimits = map[string]int64{ + "is/floating-ip": 20, + "is/security-group": 25, + "is/load-balancer": 50, + "is/instance": 200, + "is/vpc": 10, +} + +// Load fetches current IBM Cloud VPC resource usage for quota comparison. +// The security group count is scoped to the target VPC when deploying +// into an existing VPC, since the limit is per-VPC. +func Load(ctx context.Context, client *ibmcloudic.Client, config *types.InstallConfig) ([]quota.Quota, error) { + region := config.Platform.IBMCloud.Region + + // Resolve VPC ID for scoped security group counting. + var vpcID string + if config.Platform.IBMCloud.VPCName != "" { + vpc, err := client.GetVPCByName(ctx, config.Platform.IBMCloud.VPCName) + if err != nil { + return nil, fmt.Errorf("looking up VPC %q: %w", config.Platform.IBMCloud.VPCName, err) + } + if vpc != nil && vpc.ID != nil { + vpcID = *vpc.ID + } + } + + type counter struct { + name string + fn func() (int, error) + } + + counters := []counter{ + {"is/floating-ip", func() (int, error) { + fips, err := client.ListFloatingIPs(ctx, region) + return len(fips), err + }}, + {"is/security-group", func() (int, error) { + sgs, err := client.ListSecurityGroups(ctx, region, vpcID) + return len(sgs), err + }}, + {"is/load-balancer", func() (int, error) { + lbs, err := client.ListLoadBalancers(ctx, region) + return len(lbs), err + }}, + {"is/instance", func() (int, error) { + instances, err := client.ListInstances(ctx, region) + return len(instances), err + }}, + {"is/vpc", func() (int, error) { + vpcs, err := client.GetVPCs(ctx, region) + return len(vpcs), err + }}, + } + + quotas := make([]quota.Quota, 0, len(counters)) + for _, c := range counters { + count, err := c.fn() + if err != nil { + return nil, fmt.Errorf("counting %s: %w", c.name, err) + } + quotas = append(quotas, quota.Quota{ + Service: "is", + Name: c.name, + Region: region, + InUse: int64(count), + Limit: defaultLimits[c.name], + }) + } + + return quotas, nil +} diff --git a/pkg/asset/quota/quota.go b/pkg/asset/quota/quota.go index f6d220df906..abdec438402 100644 --- a/pkg/asset/quota/quota.go +++ b/pkg/asset/quota/quota.go @@ -12,16 +12,19 @@ import ( "github.com/openshift/installer/pkg/asset" "github.com/openshift/installer/pkg/asset/installconfig" awsconfig "github.com/openshift/installer/pkg/asset/installconfig/aws" + ibmcloudic "github.com/openshift/installer/pkg/asset/installconfig/ibmcloud" openstackvalidation "github.com/openshift/installer/pkg/asset/installconfig/openstack/validation" configpowervs "github.com/openshift/installer/pkg/asset/installconfig/powervs" "github.com/openshift/installer/pkg/asset/machines" "github.com/openshift/installer/pkg/asset/quota/aws" "github.com/openshift/installer/pkg/asset/quota/gcp" + ibmcloudquota "github.com/openshift/installer/pkg/asset/quota/ibmcloud" "github.com/openshift/installer/pkg/asset/quota/openstack" "github.com/openshift/installer/pkg/diagnostics" "github.com/openshift/installer/pkg/quota" quotaaws "github.com/openshift/installer/pkg/quota/aws" quotagcp "github.com/openshift/installer/pkg/quota/gcp" + ibmcloudconstraints "github.com/openshift/installer/pkg/quota/ibmcloud" typesaws "github.com/openshift/installer/pkg/types/aws" "github.com/openshift/installer/pkg/types/azure" "github.com/openshift/installer/pkg/types/baremetal" @@ -153,7 +156,23 @@ func (a *PlatformQuotaCheck) Generate(ctx context.Context, dependencies asset.Pa if err != nil { return errors.Wrap(err, "failed to create a new PISession") } - case azure.Name, baremetal.Name, ibmcloud.Name, external.Name, none.Name, ovirt.Name, vsphere.Name, nutanix.Name: + case ibmcloud.Name: + client, err := ibmcloudic.NewClient(ic.Config.Platform.IBMCloud.ServiceEndpoints) + if err != nil { + logrus.Warnf("Failed to create IBM Cloud client for quota check, skipping: %v", err) + return nil + } + q, err := ibmcloudquota.Load(ctx, client, ic.Config) + if err != nil { + logrus.Warnf("Failed to load IBM Cloud quotas, skipping: %v", err) + return nil + } + reports, err := quota.Check(q, ibmcloudconstraints.Constraints(ic.Config, masters, workers)) + if err != nil { + return summarizeFailingReport(reports) + } + summarizeReport(reports) + case azure.Name, baremetal.Name, external.Name, none.Name, ovirt.Name, vsphere.Name, nutanix.Name: // no special provisioning requirements to check default: err = fmt.Errorf("unknown platform type %q", platform) diff --git a/pkg/quota/ibmcloud/OWNERS b/pkg/quota/ibmcloud/OWNERS new file mode 100644 index 00000000000..7a92d60f198 --- /dev/null +++ b/pkg/quota/ibmcloud/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +approvers: + - ibmcloud-approvers +reviewers: + - ibmcloud-reviewers diff --git a/pkg/quota/ibmcloud/ibmcloud.go b/pkg/quota/ibmcloud/ibmcloud.go new file mode 100644 index 00000000000..22216373141 --- /dev/null +++ b/pkg/quota/ibmcloud/ibmcloud.go @@ -0,0 +1,97 @@ +package ibmcloud + +import ( + "sort" + + machineapi "github.com/openshift/api/machine/v1beta1" + "github.com/openshift/installer/pkg/quota" + "github.com/openshift/installer/pkg/types" +) + +// Constraints returns a list of quota constraints based on the InstallConfig. +// These constraints can be used to check if there is enough quota for creating +// a cluster for the install config. +func Constraints(config *types.InstallConfig, controlPlanes []machineapi.Machine, computes []machineapi.MachineSet) []quota.Constraint { + computeReplicas := make([]int64, len(computes)) + for i, w := range computes { + computeReplicas[i] = int64(*w.Spec.Replicas) + } + + var ret []quota.Constraint + for _, gen := range []constraintGenerator{ + network(config, len(controlPlanes)), + instances(config, len(controlPlanes), computeReplicas), + } { + ret = append(ret, gen()...) + } + return aggregate(ret) +} + +// constraintGenerator generates a list of constraints. +type constraintGenerator func() []quota.Constraint + +func network(config *types.InstallConfig, controlPlaneCount int) func() []quota.Constraint { + return func() []quota.Constraint { + region := config.Platform.IBMCloud.Region + + // Floating IPs: bootstrap + control plane nodes. + // The public API LB also uses floating IPs when publish is External. + fipCount := int64(1 + controlPlaneCount) + if config.Publish == types.ExternalPublishingStrategy || config.Publish == "" { + fipCount++ + } + + ret := []quota.Constraint{ + {Name: "is/floating-ip", Region: region, Count: fipCount}, + {Name: "is/security-group", Region: region, Count: 6}, + {Name: "is/load-balancer", Region: region, Count: 2}, + } + + if config.Platform.IBMCloud.VPCName == "" { + ret = append(ret, quota.Constraint{ + Name: "is/vpc", Region: region, Count: 1, + }) + } + + return ret + } +} + +func instances(config *types.InstallConfig, controlPlaneCount int, computeReplicas []int64) func() []quota.Constraint { + return func() []quota.Constraint { + region := config.Platform.IBMCloud.Region + + // control plane + bootstrap + count := int64(controlPlaneCount + 1) + for _, r := range computeReplicas { + count += r + } + + return []quota.Constraint{ + {Name: "is/instance", Region: region, Count: count}, + } + } +} + +func aggregate(constraints []quota.Constraint) []quota.Constraint { + if len(constraints) == 0 { + return nil + } + + sort.SliceStable(constraints, func(i, j int) bool { + return constraints[i].Name < constraints[j].Name + }) + + i := 0 + for j := 1; j < len(constraints); j++ { + if constraints[i].Name == constraints[j].Name && constraints[i].Region == constraints[j].Region { + constraints[i].Count += constraints[j].Count + } else { + i++ + if i != j { + constraints[i] = constraints[j] + } + } + } + return constraints[:i+1] +} diff --git a/pkg/quota/ibmcloud/ibmcloud_test.go b/pkg/quota/ibmcloud/ibmcloud_test.go new file mode 100644 index 00000000000..f3d944db442 --- /dev/null +++ b/pkg/quota/ibmcloud/ibmcloud_test.go @@ -0,0 +1,60 @@ +package ibmcloud + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/openshift/installer/pkg/quota" +) + +func Test_aggregate(t *testing.T) { + cases := []struct { + input []quota.Constraint + exp []quota.Constraint + }{{ + input: []quota.Constraint{ + {Name: "is/instance", Region: "us-east", Count: 3}, + {Name: "is/instance", Region: "us-east", Count: 1}, + }, + exp: []quota.Constraint{ + {Name: "is/instance", Region: "us-east", Count: 4}, + }, + }, { + input: []quota.Constraint{ + {Name: "is/floating-ip", Region: "us-east", Count: 5}, + {Name: "is/instance", Region: "us-east", Count: 3}, + {Name: "is/load-balancer", Region: "us-east", Count: 2}, + {Name: "is/instance", Region: "us-east", Count: 1}, + }, + exp: []quota.Constraint{ + {Name: "is/floating-ip", Region: "us-east", Count: 5}, + {Name: "is/instance", Region: "us-east", Count: 4}, + {Name: "is/load-balancer", Region: "us-east", Count: 2}, + }, + }, { + input: []quota.Constraint{ + {Name: "is/floating-ip", Region: "us-east", Count: 5}, + {Name: "is/instance", Region: "us-east", Count: 3}, + {Name: "is/load-balancer", Region: "us-east", Count: 2}, + {Name: "is/security-group", Region: "us-east", Count: 6}, + }, + exp: []quota.Constraint{ + {Name: "is/floating-ip", Region: "us-east", Count: 5}, + {Name: "is/instance", Region: "us-east", Count: 3}, + {Name: "is/load-balancer", Region: "us-east", Count: 2}, + {Name: "is/security-group", Region: "us-east", Count: 6}, + }, + }, { + input: nil, + exp: nil, + }} + + for idx, test := range cases { + t.Run(fmt.Sprintf("test %d", idx), func(t *testing.T) { + got := aggregate(test.input) + assert.EqualValues(t, test.exp, got) + }) + } +}