diff --git a/api/hypershift/v1beta1/nodepool_types.go b/api/hypershift/v1beta1/nodepool_types.go
index dc17f42ee9e..5ebc5378064 100644
--- a/api/hypershift/v1beta1/nodepool_types.go
+++ b/api/hypershift/v1beta1/nodepool_types.go
@@ -106,7 +106,7 @@ type NodePool struct {
// +kubebuilder:validation:XValidation:rule="!has(self.replicas) || !has(self.autoScaling)", message="Both replicas or autoScaling should not be set"
// +kubebuilder:validation:XValidation:rule="self.arch != 's390x' || has(self.platform.kubevirt)", message="s390x is only supported on KubeVirt platform"
// +kubebuilder:validation:XValidation:rule="!has(self.platform.aws) || !has(self.platform.aws.imageType) || self.platform.aws.imageType != 'Windows' || self.arch == 'amd64'", message="ImageType 'Windows' requires arch 'amd64' (AWS only)"
-// +kubebuilder:validation:XValidation:rule="!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type == 'AWS'", message="Scale-from-zero (autoScaling.min=0) is currently only supported for AWS platform"
+// +kubebuilder:validation:XValidation:rule="!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type == 'AWS' || self.platform.type == 'Azure'", message="Scale-from-zero (autoScaling.min=0) is currently only supported for AWS and Azure platforms"
type NodePoolSpec struct {
// clusterName is the name of the HostedCluster this NodePool belongs to.
// If a HostedCluster with this name doesn't exist, the controller will no-op until it exists.
@@ -501,7 +501,7 @@ type NodePoolManagement struct {
// +kubebuilder:validation:XValidation:rule="self.max >= self.min", message="max must be equal or greater than min"
type NodePoolAutoScaling struct {
// min is the minimum number of nodes to maintain in the pool.
- // Can be set to 0 for scale-from-zero for AWS platform.
+ // Can be set to 0 for scale-from-zero for AWS and Azure platforms.
// Must be >= 0 and <= .Max.
//
// +kubebuilder:validation:Minimum=0
diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/AAA_ungated.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/AAA_ungated.yaml
index 9463ef0840c..28b0b76bc4a 100644
--- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/AAA_ungated.yaml
+++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/AAA_ungated.yaml
@@ -108,7 +108,7 @@ spec:
min:
description: |-
min is the minimum number of nodes to maintain in the pool.
- Can be set to 0 for scale-from-zero for AWS platform.
+ Can be set to 0 for scale-from-zero for AWS and Azure platforms.
Must be >= 0 and <= .Max.
format: int32
minimum: 0
@@ -1518,9 +1518,9 @@ spec:
rule: '!has(self.platform.aws) || !has(self.platform.aws.imageType)
|| self.platform.aws.imageType != ''Windows'' || self.arch == ''amd64'''
- message: Scale-from-zero (autoScaling.min=0) is currently only supported
- for AWS platform
+ for AWS and Azure platforms
rule: '!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type
- == ''AWS'''
+ == ''AWS'' || self.platform.type == ''Azure'''
status:
description: status is the latest observed status of the NodePool.
properties:
diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/GCPPlatform.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/GCPPlatform.yaml
index ea37b91c13a..2705760fb21 100644
--- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/GCPPlatform.yaml
+++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/GCPPlatform.yaml
@@ -108,7 +108,7 @@ spec:
min:
description: |-
min is the minimum number of nodes to maintain in the pool.
- Can be set to 0 for scale-from-zero for AWS platform.
+ Can be set to 0 for scale-from-zero for AWS and Azure platforms.
Must be >= 0 and <= .Max.
format: int32
minimum: 0
@@ -1787,9 +1787,9 @@ spec:
rule: '!has(self.platform.aws) || !has(self.platform.aws.imageType)
|| self.platform.aws.imageType != ''Windows'' || self.arch == ''amd64'''
- message: Scale-from-zero (autoScaling.min=0) is currently only supported
- for AWS platform
+ for AWS and Azure platforms
rule: '!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type
- == ''AWS'''
+ == ''AWS'' || self.platform.type == ''Azure'''
status:
description: status is the latest observed status of the NodePool.
properties:
diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/OpenStack.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/OpenStack.yaml
index baed37846b3..a7c77a9ab07 100644
--- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/OpenStack.yaml
+++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/nodepools.hypershift.openshift.io/OpenStack.yaml
@@ -108,7 +108,7 @@ spec:
min:
description: |-
min is the minimum number of nodes to maintain in the pool.
- Can be set to 0 for scale-from-zero for AWS platform.
+ Can be set to 0 for scale-from-zero for AWS and Azure platforms.
Must be >= 0 and <= .Max.
format: int32
minimum: 0
@@ -1705,9 +1705,9 @@ spec:
rule: '!has(self.platform.aws) || !has(self.platform.aws.imageType)
|| self.platform.aws.imageType != ''Windows'' || self.arch == ''amd64'''
- message: Scale-from-zero (autoScaling.min=0) is currently only supported
- for AWS platform
+ for AWS and Azure platforms
rule: '!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type
- == ''AWS'''
+ == ''AWS'' || self.platform.type == ''Azure'''
status:
description: status is the latest observed status of the NodePool.
properties:
diff --git a/cmd/install/assets/crds/hypershift-operator/tests/nodepools.hypershift.openshift.io/stable.nodepools.autoscaling.testsuite.yaml b/cmd/install/assets/crds/hypershift-operator/tests/nodepools.hypershift.openshift.io/stable.nodepools.autoscaling.testsuite.yaml
index 924499411ac..10ee8107523 100644
--- a/cmd/install/assets/crds/hypershift-operator/tests/nodepools.hypershift.openshift.io/stable.nodepools.autoscaling.testsuite.yaml
+++ b/cmd/install/assets/crds/hypershift-operator/tests/nodepools.hypershift.openshift.io/stable.nodepools.autoscaling.testsuite.yaml
@@ -31,7 +31,7 @@ tests:
id: "subnet-01234567"
type: AWS
- - name: when autoScaling min=0 on Azure platform it should fail
+ - name: when autoScaling min=0 on Azure platform it should pass
initial: |
apiVersion: hypershift.openshift.io/v1beta1
kind: NodePool
@@ -56,7 +56,6 @@ tests:
diskStorageAccountType: Premium_LRS
subnetID: "/subscriptions/12345678-1234-5678-9012-123456789012/resourceGroups/test-rg/providers/Microsoft.Network/virtualNetworks/test-vnet/subnets/test-subnet"
type: Azure
- expectedError: "Scale-from-zero (autoScaling.min=0) is currently only supported for AWS platform"
- name: when autoScaling min=0 on Agent platform it should fail
initial: |
@@ -77,7 +76,7 @@ tests:
agent: {}
type: Agent
- expectedError: "Scale-from-zero (autoScaling.min=0) is currently only supported for AWS platform"
+ expectedError: "Scale-from-zero (autoScaling.min=0) is currently only supported for AWS and Azure platforms"
- name: when autoScaling min=0 on KubeVirt platform it should fail
initial: |
@@ -101,7 +100,7 @@ tests:
persistent:
size: 32Gi
type: KubeVirt
- expectedError: "Scale-from-zero (autoScaling.min=0) is currently only supported for AWS platform"
+ expectedError: "Scale-from-zero (autoScaling.min=0) is currently only supported for AWS and Azure platforms"
- name: when autoScaling min=1 on Azure platform it should pass
initial: |
diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-CustomNoUpgrade.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-CustomNoUpgrade.crd.yaml
index 055967ac455..4e38e77b912 100644
--- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-CustomNoUpgrade.crd.yaml
+++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-CustomNoUpgrade.crd.yaml
@@ -111,7 +111,7 @@ spec:
min:
description: |-
min is the minimum number of nodes to maintain in the pool.
- Can be set to 0 for scale-from-zero for AWS platform.
+ Can be set to 0 for scale-from-zero for AWS and Azure platforms.
Must be >= 0 and <= .Max.
format: int32
minimum: 0
@@ -1975,9 +1975,9 @@ spec:
rule: '!has(self.platform.aws) || !has(self.platform.aws.imageType)
|| self.platform.aws.imageType != ''Windows'' || self.arch == ''amd64'''
- message: Scale-from-zero (autoScaling.min=0) is currently only supported
- for AWS platform
+ for AWS and Azure platforms
rule: '!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type
- == ''AWS'''
+ == ''AWS'' || self.platform.type == ''Azure'''
status:
description: status is the latest observed status of the NodePool.
properties:
diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-Default.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-Default.crd.yaml
index d317df1b41a..0df74130419 100644
--- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-Default.crd.yaml
+++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-Default.crd.yaml
@@ -111,7 +111,7 @@ spec:
min:
description: |-
min is the minimum number of nodes to maintain in the pool.
- Can be set to 0 for scale-from-zero for AWS platform.
+ Can be set to 0 for scale-from-zero for AWS and Azure platforms.
Must be >= 0 and <= .Max.
format: int32
minimum: 0
@@ -1521,9 +1521,9 @@ spec:
rule: '!has(self.platform.aws) || !has(self.platform.aws.imageType)
|| self.platform.aws.imageType != ''Windows'' || self.arch == ''amd64'''
- message: Scale-from-zero (autoScaling.min=0) is currently only supported
- for AWS platform
+ for AWS and Azure platforms
rule: '!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type
- == ''AWS'''
+ == ''AWS'' || self.platform.type == ''Azure'''
status:
description: status is the latest observed status of the NodePool.
properties:
diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-TechPreviewNoUpgrade.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-TechPreviewNoUpgrade.crd.yaml
index f8650900421..4680973aea3 100644
--- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-TechPreviewNoUpgrade.crd.yaml
+++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/nodepools-TechPreviewNoUpgrade.crd.yaml
@@ -111,7 +111,7 @@ spec:
min:
description: |-
min is the minimum number of nodes to maintain in the pool.
- Can be set to 0 for scale-from-zero for AWS platform.
+ Can be set to 0 for scale-from-zero for AWS and Azure platforms.
Must be >= 0 and <= .Max.
format: int32
minimum: 0
@@ -1975,9 +1975,9 @@ spec:
rule: '!has(self.platform.aws) || !has(self.platform.aws.imageType)
|| self.platform.aws.imageType != ''Windows'' || self.arch == ''amd64'''
- message: Scale-from-zero (autoScaling.min=0) is currently only supported
- for AWS platform
+ for AWS and Azure platforms
rule: '!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type
- == ''AWS'''
+ == ''AWS'' || self.platform.type == ''Azure'''
status:
description: status is the latest observed status of the NodePool.
properties:
diff --git a/cmd/install/install.go b/cmd/install/install.go
index c13fd743ec1..c2b0366ef96 100644
--- a/cmd/install/install.go
+++ b/cmd/install/install.go
@@ -291,18 +291,15 @@ func (o *Options) validateScaleFromZeroConfig() []error {
return nil
}
var errs []error
- supportedProviders := set.New("aws")
- // Check mutual exclusivity - only one of file or secret should be provided
+ supportedProviders := set.New("aws", "azure")
if len(o.ScaleFromZeroCreds) != 0 && len(o.ScaleFromZeroCredentialsSecret) != 0 {
errs = append(errs, fmt.Errorf("only one of --scale-from-zero-creds or --scale-from-zero-secret is supported"))
}
- // Provider is required when using scale-from-zero credentials
if len(o.ScaleFromZeroProvider) == 0 {
errs = append(errs, fmt.Errorf("--scale-from-zero-provider is required when using scale-from-zero credentials"))
} else if !supportedProviders.Has(o.ScaleFromZeroProvider) {
errs = append(errs, fmt.Errorf("invalid --scale-from-zero-provider: %s (must be one of: %v)", o.ScaleFromZeroProvider, supportedProviders.UnsortedList()))
}
- // Validate credentials file exists and is accessible if provided
if len(o.ScaleFromZeroCreds) > 0 {
if _, err := os.Stat(o.ScaleFromZeroCreds); err != nil {
if os.IsNotExist(err) {
@@ -435,7 +432,7 @@ func NewCommand() *cobra.Command {
cmd.PersistentFlags().StringSliceVar(&opts.PlatformsToInstall, "limit-crd-install", opts.PlatformsToInstall, "Used to limit the CRDs that are installed to a per platform basis (example: --limit-crd-install=AWS,Azure). If this flag is not specified, all CRDs for all platforms will be installed. Valid, case-insensitive values are: AWS, Azure, IBMCloud, KubeVirt, Agent, OpenStack, GCP.")
cmd.PersistentFlags().StringToStringVar(&opts.AdditionalOperatorEnvVars, "additional-operator-env-vars", opts.AdditionalOperatorEnvVars, "Set of additional environment variables to be set on the HyperShift Operator deployment.")
cmd.PersistentFlags().BoolVar(&opts.EnableAuditLogPersistence, "enable-audit-log-persistence", opts.EnableAuditLogPersistence, "If true, enables persistent audit logs with automatic snapshots for kube-apiserver pods")
- cmd.PersistentFlags().StringVar(&opts.ScaleFromZeroProvider, "scale-from-zero-provider", opts.ScaleFromZeroProvider, "Platform type for scale-from-zero autoscaling (aws)")
+ cmd.PersistentFlags().StringVar(&opts.ScaleFromZeroProvider, "scale-from-zero-provider", opts.ScaleFromZeroProvider, "Platform type for scale-from-zero autoscaling (aws, azure)")
cmd.PersistentFlags().StringVar(&opts.ScaleFromZeroCreds, "scale-from-zero-creds", opts.ScaleFromZeroCreds, "Path to credentials file for scale-from-zero instance type queries")
cmd.PersistentFlags().StringVar(&opts.ScaleFromZeroCredentialsSecret, "scale-from-zero-secret", opts.ScaleFromZeroCredentialsSecret, "Name of existing secret containing scale-from-zero credentials (alternative to --scale-from-zero-creds)")
cmd.PersistentFlags().StringVar(&opts.ScaleFromZeroCredentialsSecretKey, "scale-from-zero-secret-key", opts.ScaleFromZeroCredentialsSecretKey, "Key within the scale-from-zero credentials secret (default: credentials)")
diff --git a/docs/content/reference/aggregated-docs.md b/docs/content/reference/aggregated-docs.md
index e1e9658ac27..34e7f5fb53c 100644
--- a/docs/content/reference/aggregated-docs.md
+++ b/docs/content/reference/aggregated-docs.md
@@ -48365,7 +48365,7 @@ int32
min is the minimum number of nodes to maintain in the pool.
-Can be set to 0 for scale-from-zero for AWS platform.
+Can be set to 0 for scale-from-zero for AWS and Azure platforms.
Must be >= 0 and <= .Max.
|
diff --git a/docs/content/reference/api.md b/docs/content/reference/api.md
index 828e8531ff1..91284eac765 100644
--- a/docs/content/reference/api.md
+++ b/docs/content/reference/api.md
@@ -12680,7 +12680,7 @@ int32
min is the minimum number of nodes to maintain in the pool.
-Can be set to 0 for scale-from-zero for AWS platform.
+Can be set to 0 for scale-from-zero for AWS and Azure platforms.
Must be >= 0 and <= .Max.
|
diff --git a/go.mod b/go.mod
index 536ea13a88a..493a7fd6ecb 100644
--- a/go.mod
+++ b/go.mod
@@ -6,6 +6,7 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.1
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2 v2.2.0
+ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/dns/armdns v1.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v5 v5.2.0
@@ -137,7 +138,6 @@ require (
cloud.google.com/go/compute/metadata v0.9.0 // indirect
cyphar.com/go-pathrs v0.2.1 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.12.0 // indirect
- github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.4.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0 // indirect
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
diff --git a/hypershift-operator/controllers/nodepool/capi.go b/hypershift-operator/controllers/nodepool/capi.go
index 248c7fc468b..907f4ab82d0 100644
--- a/hypershift-operator/controllers/nodepool/capi.go
+++ b/hypershift-operator/controllers/nodepool/capi.go
@@ -51,7 +51,8 @@ const (
// and let nodepool, hostedcluster, and client be fields of CAPI / interface methods.
type CAPI struct {
*Token
- capiClusterName string
+ capiClusterName string
+ scaleFromZeroPlatform hyperv1.PlatformType
upsert.ApplyProvider
}
@@ -472,7 +473,7 @@ func (c *CAPI) reconcileMachineDeployment(ctx context.Context, log logr.Logger,
}
}
- setMachineDeploymentReplicas(nodePool, machineDeployment)
+ setMachineDeploymentReplicas(nodePool, machineDeployment, c.scaleFromZeroPlatform)
if updated := c.propagateVersionAndTemplate(log, machineDeployment, machineTemplateCR); updated {
return nil
@@ -756,7 +757,7 @@ func (c *CAPI) reconcileMachineHealthCheck(ctx context.Context,
// setMachineDeploymentReplicas sets wanted replicas:
// If autoscaling is enabled we reconcile min/max annotations and leave replicas untouched.
-func setMachineDeploymentReplicas(nodePool *hyperv1.NodePool, machineDeployment *capiv1.MachineDeployment) {
+func setMachineDeploymentReplicas(nodePool *hyperv1.NodePool, machineDeployment *capiv1.MachineDeployment, scaleFromZeroPlatform hyperv1.PlatformType) {
if machineDeployment.Annotations == nil {
machineDeployment.Annotations = make(map[string]string)
}
@@ -773,7 +774,7 @@ func setMachineDeploymentReplicas(nodePool *hyperv1.NodePool, machineDeployment
// NodePools from being permanently stuck at 0 replicas on platforms that don't support
// scale-from-zero metadata.
effectiveMin := ptr.Deref(nodePool.Spec.AutoScaling.Min, 0)
- if effectiveMin == 0 && nodePool.Spec.Platform.Type != hyperv1.AWSPlatform {
+ if effectiveMin == 0 && nodePool.Spec.Platform.Type != hyperv1.AWSPlatform && nodePool.Spec.Platform.Type != scaleFromZeroPlatform {
effectiveMin = 1
}
@@ -957,7 +958,7 @@ func (c *CAPI) reconcileMachineSet(ctx context.Context,
}
machineSet.Spec.Template.Annotations[nodePoolAnnotationTaints] = taintsInJSON
- setMachineSetReplicas(nodePool, machineSet)
+ setMachineSetReplicas(nodePool, machineSet, c.scaleFromZeroPlatform)
isUpdating := false
// Propagate version and userData Secret to the MachineSet.
@@ -1064,7 +1065,7 @@ func machineSetInPlaceRolloutIsComplete(machineSet *capiv1.MachineSet) bool {
// setMachineSetReplicas sets wanted replicas:
// If autoscaling is enabled we reconcile min/max annotations and leave replicas untouched.
-func setMachineSetReplicas(nodePool *hyperv1.NodePool, machineSet *capiv1.MachineSet) {
+func setMachineSetReplicas(nodePool *hyperv1.NodePool, machineSet *capiv1.MachineSet, scaleFromZeroPlatform hyperv1.PlatformType) {
if machineSet.Annotations == nil {
machineSet.Annotations = make(map[string]string)
}
@@ -1081,7 +1082,7 @@ func setMachineSetReplicas(nodePool *hyperv1.NodePool, machineSet *capiv1.Machin
// NodePools from being permanently stuck at 0 replicas on platforms that don't support
// scale-from-zero metadata.
effectiveMin := ptr.Deref(nodePool.Spec.AutoScaling.Min, 0)
- if effectiveMin == 0 && nodePool.Spec.Platform.Type != hyperv1.AWSPlatform {
+ if effectiveMin == 0 && nodePool.Spec.Platform.Type != hyperv1.AWSPlatform && nodePool.Spec.Platform.Type != scaleFromZeroPlatform {
effectiveMin = 1
}
diff --git a/hypershift-operator/controllers/nodepool/capi_test.go b/hypershift-operator/controllers/nodepool/capi_test.go
index 1fe35dc09f6..4250b5025c8 100644
--- a/hypershift-operator/controllers/nodepool/capi_test.go
+++ b/hypershift-operator/controllers/nodepool/capi_test.go
@@ -42,6 +42,7 @@ func TestSetMachineSetReplicas(t *testing.T) {
name string
nodePool *hyperv1.NodePool
machineSet *capiv1.MachineSet
+ scaleFromZeroPlatform hyperv1.PlatformType
expectReplicas int32
expectAutoscalerAnnotations map[string]string
}{
@@ -178,7 +179,7 @@ func TestSetMachineSetReplicas(t *testing.T) {
},
},
{
- name: "it enforces min=1 for Azure platform even when NodePool specifies min=0",
+ name: "it allows min=0 for Azure platform (scale-from-zero)",
nodePool: &hyperv1.NodePool{
ObjectMeta: metav1.ObjectMeta{},
Spec: hyperv1.NodePoolSpec{
@@ -199,9 +200,10 @@ func TestSetMachineSetReplicas(t *testing.T) {
Replicas: nil,
},
},
- expectReplicas: 1,
+ scaleFromZeroPlatform: hyperv1.AzurePlatform,
+ expectReplicas: 0,
expectAutoscalerAnnotations: map[string]string{
- autoscalerMinAnnotation: "1",
+ autoscalerMinAnnotation: "0",
autoscalerMaxAnnotation: "5",
},
},
@@ -266,7 +268,7 @@ func TestSetMachineSetReplicas(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)
- setMachineSetReplicas(tc.nodePool, tc.machineSet)
+ setMachineSetReplicas(tc.nodePool, tc.machineSet, tc.scaleFromZeroPlatform)
g.Expect(*tc.machineSet.Spec.Replicas).To(Equal(tc.expectReplicas))
g.Expect(tc.machineSet.Annotations).To(Equal(tc.expectAutoscalerAnnotations))
})
@@ -279,6 +281,7 @@ func TestSetMachineDeploymentReplicas(t *testing.T) {
name string
nodePool *hyperv1.NodePool
machineDeployment *capiv1.MachineDeployment
+ scaleFromZeroPlatform hyperv1.PlatformType
expectReplicas int32
expectAutoscalerAnnotations map[string]string
}{
@@ -504,7 +507,7 @@ func TestSetMachineDeploymentReplicas(t *testing.T) {
},
},
{
- name: "it enforces min=1 for Azure platform even when NodePool specifies min=0",
+ name: "it allows min=0 for Azure platform (scale-from-zero)",
nodePool: &hyperv1.NodePool{
ObjectMeta: metav1.ObjectMeta{},
Spec: hyperv1.NodePoolSpec{
@@ -525,9 +528,10 @@ func TestSetMachineDeploymentReplicas(t *testing.T) {
Replicas: nil,
},
},
- expectReplicas: 1,
+ scaleFromZeroPlatform: hyperv1.AzurePlatform,
+ expectReplicas: 0,
expectAutoscalerAnnotations: map[string]string{
- autoscalerMinAnnotation: "1",
+ autoscalerMinAnnotation: "0",
autoscalerMaxAnnotation: "5",
},
},
@@ -592,7 +596,7 @@ func TestSetMachineDeploymentReplicas(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)
- setMachineDeploymentReplicas(tc.nodePool, tc.machineDeployment)
+ setMachineDeploymentReplicas(tc.nodePool, tc.machineDeployment, tc.scaleFromZeroPlatform)
g.Expect(*tc.machineDeployment.Spec.Replicas).To(Equal(tc.expectReplicas))
g.Expect(tc.machineDeployment.Annotations).To(Equal(tc.expectAutoscalerAnnotations))
})
diff --git a/hypershift-operator/controllers/nodepool/conditions.go b/hypershift-operator/controllers/nodepool/conditions.go
index cff009e3590..7b5d710620c 100644
--- a/hypershift-operator/controllers/nodepool/conditions.go
+++ b/hypershift-operator/controllers/nodepool/conditions.go
@@ -181,9 +181,7 @@ func (r *NodePoolReconciler) autoscalerEnabledCondition(_ context.Context, nodeP
// Check platform-specific support
var supported bool
switch nodePool.Spec.Platform.Type {
- case hyperv1.AWSPlatform:
- // AWS supports scale-from-zero either natively (when CPO supports it)
- // or via MachineDeployment controller workaround annotations
+ case hyperv1.AWSPlatform, hyperv1.AzurePlatform:
supported = true
default:
// Other platforms don't support autoscaling from zero yet
diff --git a/hypershift-operator/controllers/nodepool/instancetype/azure/provider.go b/hypershift-operator/controllers/nodepool/instancetype/azure/provider.go
new file mode 100644
index 00000000000..57dd7a499ba
--- /dev/null
+++ b/hypershift-operator/controllers/nodepool/instancetype/azure/provider.go
@@ -0,0 +1,168 @@
+package azure
+
+import (
+ "context"
+ "fmt"
+ "math"
+ "strconv"
+ "strings"
+ "sync"
+
+ hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
+ "github.com/openshift/hypershift/hypershift-operator/controllers/nodepool/instancetype"
+
+ azruntime "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
+)
+
+// ResourceSKUsAPI defines the operations used from armcompute.ResourceSKUsClient.
+type ResourceSKUsAPI interface {
+ NewListPager(options *armcompute.ResourceSKUsClientListOptions) *azruntime.Pager[armcompute.ResourceSKUsClientListResponse]
+}
+
+// Compile-time check that the real client satisfies our interface.
+var _ ResourceSKUsAPI = (*armcompute.ResourceSKUsClient)(nil)
+
+// Provider implements the instancetype.Provider interface for Azure.
+// It queries the Azure Resource SKUs API to get VM size specifications.
+type Provider struct {
+ skuClient ResourceSKUsAPI
+ location string
+ cache map[string]*instancetype.InstanceTypeInfo
+ mu sync.Mutex
+}
+
+// NewProvider creates a new Azure instance type provider.
+func NewProvider(skuClient ResourceSKUsAPI, location string) *Provider {
+ return &Provider{
+ skuClient: skuClient,
+ location: location,
+ }
+}
+
+// GetInstanceTypeInfo queries Azure Resource SKUs API for VM size specifications.
+func (p *Provider) GetInstanceTypeInfo(ctx context.Context, instanceType string) (*instancetype.InstanceTypeInfo, error) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ if p.cache == nil {
+ if err := p.loadSKUs(ctx); err != nil {
+ return nil, fmt.Errorf("failed to load Azure Resource SKUs: %w", err)
+ }
+ }
+
+ info, ok := p.cache[instanceType]
+ if !ok {
+ return nil, fmt.Errorf("VM size %q not found in Azure Resource SKUs for location %q", instanceType, p.location)
+ }
+
+ copied := *info
+ return &copied, nil
+}
+
+func (p *Provider) loadSKUs(ctx context.Context) error {
+ nextCache := make(map[string]*instancetype.InstanceTypeInfo)
+
+ filter := fmt.Sprintf("location eq '%s'", p.location)
+ pager := p.skuClient.NewListPager(&armcompute.ResourceSKUsClientListOptions{
+ Filter: &filter,
+ })
+
+ for pager.More() {
+ page, err := pager.NextPage(ctx)
+ if err != nil {
+ return fmt.Errorf("failed to list Azure Resource SKUs: %w", err)
+ }
+
+ for _, sku := range page.Value {
+ if sku.ResourceType == nil || !strings.EqualFold(*sku.ResourceType, "virtualMachines") {
+ continue
+ }
+
+ info, err := transformSKU(sku)
+ if err != nil {
+ continue
+ }
+ nextCache[info.InstanceType] = info
+ }
+ }
+
+ p.cache = nextCache
+ return nil
+}
+
+func transformSKU(sku *armcompute.ResourceSKU) (*instancetype.InstanceTypeInfo, error) {
+ if sku.Name == nil || *sku.Name == "" {
+ return nil, fmt.Errorf("SKU name is missing")
+ }
+
+ name := *sku.Name
+ info := &instancetype.InstanceTypeInfo{
+ InstanceType: name,
+ }
+
+ vcpuStr, ok := getCapabilityValue(sku.Capabilities, "vCPUs")
+ if !ok {
+ return nil, fmt.Errorf("missing vCPUs capability for VM size %q", name)
+ }
+ vcpu, err := strconv.ParseInt(vcpuStr, 10, 32)
+ if err != nil {
+ return nil, fmt.Errorf("invalid vCPUs value %q for VM size %q: %w", vcpuStr, name, err)
+ }
+ if vcpu <= 0 {
+ return nil, fmt.Errorf("invalid vCPUs count %d for VM size %q", vcpu, name)
+ }
+ info.VCPU = int32(vcpu)
+
+ memStr, ok := getCapabilityValue(sku.Capabilities, "MemoryGB")
+ if !ok {
+ return nil, fmt.Errorf("missing MemoryGB capability for VM size %q", name)
+ }
+ memGB, err := strconv.ParseFloat(memStr, 64)
+ if err != nil {
+ return nil, fmt.Errorf("invalid MemoryGB value %q for VM size %q: %w", memStr, name, err)
+ }
+ if memGB <= 0 {
+ return nil, fmt.Errorf("invalid MemoryGB value %v for VM size %q", memGB, name)
+ }
+ info.MemoryMb = int64(math.Round(memGB * 1024))
+
+ gpuStr, ok := getCapabilityValue(sku.Capabilities, "GPUs")
+ if ok {
+ gpu, err := strconv.ParseInt(gpuStr, 10, 32)
+ if err != nil {
+ return nil, fmt.Errorf("invalid GPUs value %q for VM size %q: %w", gpuStr, name, err)
+ }
+ if gpu < 0 {
+ return nil, fmt.Errorf("negative GPUs count %d for VM size %q", gpu, name)
+ }
+ info.GPU = int32(gpu)
+ }
+
+ archStr, ok := getCapabilityValue(sku.Capabilities, "CpuArchitectureType")
+ if !ok {
+ return nil, fmt.Errorf("missing CpuArchitectureType capability for VM size %q", name)
+ }
+ switch strings.ToLower(archStr) {
+ case "x64":
+ info.CPUArchitecture = hyperv1.ArchitectureAMD64
+ case "arm64":
+ info.CPUArchitecture = hyperv1.ArchitectureARM64
+ default:
+ return nil, fmt.Errorf("unsupported CPU architecture %q for VM size %q", archStr, name)
+ }
+
+ return info, nil
+}
+
+func getCapabilityValue(capabilities []*armcompute.ResourceSKUCapabilities, name string) (string, bool) {
+ for _, cap := range capabilities {
+ if cap.Name != nil && *cap.Name == name {
+ if cap.Value != nil {
+ return *cap.Value, true
+ }
+ return "", false
+ }
+ }
+ return "", false
+}
diff --git a/hypershift-operator/controllers/nodepool/instancetype/azure/provider_test.go b/hypershift-operator/controllers/nodepool/instancetype/azure/provider_test.go
new file mode 100644
index 00000000000..04a6dfafe9a
--- /dev/null
+++ b/hypershift-operator/controllers/nodepool/instancetype/azure/provider_test.go
@@ -0,0 +1,424 @@
+package azure
+
+import (
+ "context"
+ "fmt"
+ "testing"
+
+ . "github.com/onsi/gomega"
+
+ hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
+ "github.com/openshift/hypershift/hypershift-operator/controllers/nodepool/instancetype"
+
+ azruntime "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
+ "github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
+)
+
+type mockResourceSKUsAPI struct {
+ skus []*armcompute.ResourceSKU
+ err error
+}
+
+func (m *mockResourceSKUsAPI) NewListPager(_ *armcompute.ResourceSKUsClientListOptions) *azruntime.Pager[armcompute.ResourceSKUsClientListResponse] {
+ return azruntime.NewPager(azruntime.PagingHandler[armcompute.ResourceSKUsClientListResponse]{
+ More: func(page armcompute.ResourceSKUsClientListResponse) bool {
+ return false
+ },
+ Fetcher: func(ctx context.Context, page *armcompute.ResourceSKUsClientListResponse) (armcompute.ResourceSKUsClientListResponse, error) {
+ if m.err != nil {
+ return armcompute.ResourceSKUsClientListResponse{}, m.err
+ }
+ return armcompute.ResourceSKUsClientListResponse{
+ ResourceSKUsResult: armcompute.ResourceSKUsResult{
+ Value: m.skus,
+ },
+ }, nil
+ },
+ })
+}
+
+func makeSKU(name, resourceType string, capabilities map[string]string) *armcompute.ResourceSKU {
+ sku := &armcompute.ResourceSKU{
+ Name: to.Ptr(name),
+ ResourceType: to.Ptr(resourceType),
+ }
+ for k, v := range capabilities {
+ sku.Capabilities = append(sku.Capabilities, &armcompute.ResourceSKUCapabilities{
+ Name: to.Ptr(k),
+ Value: to.Ptr(v),
+ })
+ }
+ return sku
+}
+
+func TestTransformSKU_WhenValidInput_ItShouldTransformCorrectly(t *testing.T) {
+ tests := []struct {
+ name string
+ input *armcompute.ResourceSKU
+ expected *instancetype.InstanceTypeInfo
+ }{
+ {
+ name: "When Standard_D4s_v3 with x64 arch it should transform correctly",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4",
+ "MemoryGB": "16",
+ "CpuArchitectureType": "x64",
+ }),
+ expected: &instancetype.InstanceTypeInfo{
+ InstanceType: "Standard_D4s_v3",
+ VCPU: 4,
+ MemoryMb: 16384,
+ GPU: 0,
+ CPUArchitecture: hyperv1.ArchitectureAMD64,
+ },
+ },
+ {
+ name: "When GPU VM it should set GPU count",
+ input: makeSKU("Standard_NC16as_T4_v3", "virtualMachines", map[string]string{
+ "vCPUs": "16",
+ "MemoryGB": "110",
+ "GPUs": "1",
+ "CpuArchitectureType": "x64",
+ }),
+ expected: &instancetype.InstanceTypeInfo{
+ InstanceType: "Standard_NC16as_T4_v3",
+ VCPU: 16,
+ MemoryMb: 112640,
+ GPU: 1,
+ CPUArchitecture: hyperv1.ArchitectureAMD64,
+ },
+ },
+ {
+ name: "When Arm64 VM it should set correct architecture",
+ input: makeSKU("Standard_D4ps_v5", "virtualMachines", map[string]string{
+ "vCPUs": "4",
+ "MemoryGB": "16",
+ "CpuArchitectureType": "Arm64",
+ }),
+ expected: &instancetype.InstanceTypeInfo{
+ InstanceType: "Standard_D4ps_v5",
+ VCPU: 4,
+ MemoryMb: 16384,
+ GPU: 0,
+ CPUArchitecture: hyperv1.ArchitectureARM64,
+ },
+ },
+ {
+ name: "When GPUs capability is absent it should default to 0",
+ input: makeSKU("Standard_B2s", "virtualMachines", map[string]string{
+ "vCPUs": "2",
+ "MemoryGB": "4",
+ "CpuArchitectureType": "x64",
+ }),
+ expected: &instancetype.InstanceTypeInfo{
+ InstanceType: "Standard_B2s",
+ VCPU: 2,
+ MemoryMb: 4096,
+ GPU: 0,
+ CPUArchitecture: hyperv1.ArchitectureAMD64,
+ },
+ },
+ {
+ name: "When MemoryGB is fractional it should convert correctly",
+ input: makeSKU("Standard_B1ls", "virtualMachines", map[string]string{
+ "vCPUs": "1",
+ "MemoryGB": "0.5",
+ "CpuArchitectureType": "x64",
+ }),
+ expected: &instancetype.InstanceTypeInfo{
+ InstanceType: "Standard_B1ls",
+ VCPU: 1,
+ MemoryMb: 512,
+ GPU: 0,
+ CPUArchitecture: hyperv1.ArchitectureAMD64,
+ },
+ },
+ {
+ name: "When MemoryGB is large it should convert correctly",
+ input: makeSKU("Standard_M416ms_v2", "virtualMachines", map[string]string{
+ "vCPUs": "416",
+ "MemoryGB": "11400",
+ "CpuArchitectureType": "x64",
+ }),
+ expected: &instancetype.InstanceTypeInfo{
+ InstanceType: "Standard_M416ms_v2",
+ VCPU: 416,
+ MemoryMb: 11673600,
+ GPU: 0,
+ CPUArchitecture: hyperv1.ArchitectureAMD64,
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ g := NewGomegaWithT(t)
+ result, err := transformSKU(tt.input)
+ g.Expect(err).ToNot(HaveOccurred())
+ g.Expect(result).To(Equal(tt.expected))
+ })
+ }
+}
+
+func TestTransformSKU_WhenMissingRequiredFields_ItShouldReturnError(t *testing.T) {
+ tests := []struct {
+ name string
+ input *armcompute.ResourceSKU
+ expectedError string
+ }{
+ {
+ name: "When SKU name is nil it should return error",
+ input: &armcompute.ResourceSKU{
+ Name: nil,
+ ResourceType: to.Ptr("virtualMachines"),
+ Capabilities: []*armcompute.ResourceSKUCapabilities{
+ {Name: to.Ptr("vCPUs"), Value: to.Ptr("4")},
+ },
+ },
+ expectedError: "SKU name is missing",
+ },
+ {
+ name: "When vCPUs capability is missing it should return error",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "MemoryGB": "16",
+ "CpuArchitectureType": "x64",
+ }),
+ expectedError: "missing vCPUs capability",
+ },
+ {
+ name: "When MemoryGB capability is missing it should return error",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4",
+ "CpuArchitectureType": "x64",
+ }),
+ expectedError: "missing MemoryGB capability",
+ },
+ {
+ name: "When CpuArchitectureType capability is missing it should return error",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4",
+ "MemoryGB": "16",
+ }),
+ expectedError: "missing CpuArchitectureType capability",
+ },
+ {
+ name: "When vCPUs value is not a valid integer it should return error",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "abc",
+ "MemoryGB": "16",
+ "CpuArchitectureType": "x64",
+ }),
+ expectedError: "invalid vCPUs value",
+ },
+ {
+ name: "When MemoryGB value is not a valid float it should return error",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4",
+ "MemoryGB": "xyz",
+ "CpuArchitectureType": "x64",
+ }),
+ expectedError: "invalid MemoryGB value",
+ },
+ {
+ name: "When vCPUs value is zero it should return error",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "0",
+ "MemoryGB": "16",
+ "CpuArchitectureType": "x64",
+ }),
+ expectedError: "invalid vCPUs count",
+ },
+ {
+ name: "When MemoryGB value is zero it should return error",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4",
+ "MemoryGB": "0",
+ "CpuArchitectureType": "x64",
+ }),
+ expectedError: "invalid MemoryGB value",
+ },
+ {
+ name: "When CpuArchitectureType is unsupported it should return error",
+ input: makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4",
+ "MemoryGB": "16",
+ "CpuArchitectureType": "i386",
+ }),
+ expectedError: "unsupported CPU architecture",
+ },
+ {
+ name: "When GPUs value is not a valid integer it should return error",
+ input: makeSKU("Standard_NC6", "virtualMachines", map[string]string{
+ "vCPUs": "6",
+ "MemoryGB": "56",
+ "GPUs": "abc",
+ "CpuArchitectureType": "x64",
+ }),
+ expectedError: "invalid GPUs value",
+ },
+ {
+ name: "When GPUs value is negative it should return error",
+ input: makeSKU("Standard_NC6", "virtualMachines", map[string]string{
+ "vCPUs": "6",
+ "MemoryGB": "56",
+ "GPUs": "-1",
+ "CpuArchitectureType": "x64",
+ }),
+ expectedError: "negative GPUs count",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ g := NewGomegaWithT(t)
+ _, err := transformSKU(tt.input)
+ g.Expect(err).To(HaveOccurred())
+ g.Expect(err.Error()).To(ContainSubstring(tt.expectedError))
+ })
+ }
+}
+
+func TestGetInstanceTypeInfo(t *testing.T) {
+ tests := []struct {
+ name string
+ skus []*armcompute.ResourceSKU
+ apiErr error
+ instanceType string
+ expected *instancetype.InstanceTypeInfo
+ expectedError string
+ }{
+ {
+ name: "When VM size exists it should return info",
+ skus: []*armcompute.ResourceSKU{
+ makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4", "MemoryGB": "16", "CpuArchitectureType": "x64",
+ }),
+ },
+ instanceType: "Standard_D4s_v3",
+ expected: &instancetype.InstanceTypeInfo{
+ InstanceType: "Standard_D4s_v3",
+ VCPU: 4,
+ MemoryMb: 16384,
+ GPU: 0,
+ CPUArchitecture: hyperv1.ArchitectureAMD64,
+ },
+ },
+ {
+ name: "When VM size not found it should return error",
+ skus: []*armcompute.ResourceSKU{
+ makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4", "MemoryGB": "16", "CpuArchitectureType": "x64",
+ }),
+ },
+ instanceType: "Standard_Nonexistent",
+ expectedError: "not found",
+ },
+ {
+ name: "When API returns error it should propagate error",
+ apiErr: fmt.Errorf("API error: throttling"),
+ instanceType: "Standard_D4s_v3",
+ expectedError: "failed to load Azure Resource SKUs",
+ },
+ {
+ name: "When SKU has matching name but wrong ResourceType it should return not found",
+ skus: []*armcompute.ResourceSKU{
+ makeSKU("Standard_D4s_v3", "disks", map[string]string{
+ "vCPUs": "4", "MemoryGB": "16", "CpuArchitectureType": "x64",
+ }),
+ },
+ instanceType: "Standard_D4s_v3",
+ expectedError: "not found",
+ },
+ {
+ name: "When multiple SKUs returned it should match only virtualMachines type",
+ skus: []*armcompute.ResourceSKU{
+ makeSKU("Standard_D4s_v3", "disks", map[string]string{
+ "vCPUs": "99", "MemoryGB": "99", "CpuArchitectureType": "x64",
+ }),
+ makeSKU("Standard_D4s_v3", "virtualMachines", map[string]string{
+ "vCPUs": "4", "MemoryGB": "16", "CpuArchitectureType": "x64",
+ }),
+ },
+ instanceType: "Standard_D4s_v3",
+ expected: &instancetype.InstanceTypeInfo{
+ InstanceType: "Standard_D4s_v3",
+ VCPU: 4,
+ MemoryMb: 16384,
+ GPU: 0,
+ CPUArchitecture: hyperv1.ArchitectureAMD64,
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ g := NewGomegaWithT(t)
+ mock := &mockResourceSKUsAPI{skus: tt.skus, err: tt.apiErr}
+ provider := NewProvider(mock, "eastus")
+ result, err := provider.GetInstanceTypeInfo(context.Background(), tt.instanceType)
+
+ if tt.expectedError != "" {
+ g.Expect(err).To(HaveOccurred())
+ g.Expect(err.Error()).To(ContainSubstring(tt.expectedError))
+ } else {
+ g.Expect(err).ToNot(HaveOccurred())
+ g.Expect(result).To(Equal(tt.expected))
+ }
+ })
+ }
+}
+
+func TestGetCapabilityValue(t *testing.T) {
+ tests := []struct {
+ name string
+ capabilities []*armcompute.ResourceSKUCapabilities
+ capName string
+ expectedVal string
+ expectedOK bool
+ }{
+ {
+ name: "When capability exists it should return the value",
+ capabilities: []*armcompute.ResourceSKUCapabilities{
+ {Name: to.Ptr("vCPUs"), Value: to.Ptr("4")},
+ },
+ capName: "vCPUs",
+ expectedVal: "4",
+ expectedOK: true,
+ },
+ {
+ name: "When capability does not exist it should return empty and false",
+ capabilities: []*armcompute.ResourceSKUCapabilities{
+ {Name: to.Ptr("vCPUs"), Value: to.Ptr("4")},
+ },
+ capName: "GPUs",
+ expectedVal: "",
+ expectedOK: false,
+ },
+ {
+ name: "When capabilities slice is nil it should return empty and false",
+ capabilities: nil,
+ capName: "vCPUs",
+ expectedVal: "",
+ expectedOK: false,
+ },
+ {
+ name: "When capability name has different case it should not match",
+ capabilities: []*armcompute.ResourceSKUCapabilities{
+ {Name: to.Ptr("vcpus"), Value: to.Ptr("4")},
+ },
+ capName: "vCPUs",
+ expectedVal: "",
+ expectedOK: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ g := NewGomegaWithT(t)
+ val, ok := getCapabilityValue(tt.capabilities, tt.capName)
+ g.Expect(ok).To(Equal(tt.expectedOK))
+ g.Expect(val).To(Equal(tt.expectedVal))
+ })
+ }
+}
diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller.go b/hypershift-operator/controllers/nodepool/nodepool_controller.go
index 8a550972e4a..2bfca1845ef 100644
--- a/hypershift-operator/controllers/nodepool/nodepool_controller.go
+++ b/hypershift-operator/controllers/nodepool/nodepool_controller.go
@@ -104,6 +104,7 @@ type NodePoolReconciler struct {
KubevirtInfraClients kvinfra.KubevirtInfraClientMap
EC2Client awsapi.EC2API
InstanceTypeProvider instancetype.Provider
+ ScaleFromZeroPlatform hyperv1.PlatformType
}
type NotReadyError struct {
@@ -385,6 +386,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho
if err != nil {
return ctrl.Result{}, err
}
+ capi.scaleFromZeroPlatform = r.ScaleFromZeroPlatform
if isPaused, duration := supportutil.IsReconciliationPaused(log, nodePool.Spec.PausedUntil); isPaused {
if err := capi.Pause(ctx); err != nil {
return ctrl.Result{}, fmt.Errorf("error pausing CAPI: %w", err)
@@ -426,7 +428,7 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho
// Set scale-from-zero annotations if provider is configured and platform is supported
// This works for both Replace (MachineDeployment) and InPlace (MachineSet) upgrade types
- if isAutoscalingEnabled(nodePool) && r.InstanceTypeProvider != nil && supportedScaleFromZeroPlatform(nodePool.Spec.Platform.Type) {
+ if isAutoscalingEnabled(nodePool) && r.InstanceTypeProvider != nil && r.ScaleFromZeroPlatform == nodePool.Spec.Platform.Type {
if err = r.reconcileScaleFromZeroAnnotations(ctx, nodePool, capi); err != nil {
log.Error(err, "Failed to set scale-from-zero annotations, will retry")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
@@ -436,11 +438,6 @@ func (r *NodePoolReconciler) reconcile(ctx context.Context, hcluster *hyperv1.Ho
return ctrl.Result{}, nil
}
-// supportedScaleFromZeroPlatform checks if the platform supports scale-from-zero functionality.
-func supportedScaleFromZeroPlatform(platform hyperv1.PlatformType) bool {
- return platform == hyperv1.AWSPlatform
-}
-
func (r *NodePoolReconciler) token(ctx context.Context, hcluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool) (*Token, error) {
// Validate and get releaseImage.
releaseImage, err := r.getReleaseImage(ctx, hcluster, nodePool.Status.Version, nodePool.Spec.Release.Image)
@@ -1260,16 +1257,15 @@ func (r *NodePoolReconciler) reconcileScaleFromZeroAnnotations(ctx context.Conte
}
machineTemplate = awsMachineTemplate
- // Future platform support can be added here:
- // case hyperv1.AzurePlatform:
- // azureTemplate := &capiazure.AzureMachineTemplate{}
- // if err := capi.getExistingMachineTemplate(ctx, azureTemplate); err != nil {
- // if apierrors.IsNotFound(err) {
- // return nil
- // }
- // return fmt.Errorf("failed to get AzureMachineTemplate: %w", err)
- // }
- // machineTemplate = azureTemplate
+ case hyperv1.AzurePlatform:
+ azureTemplate := &capiazure.AzureMachineTemplate{}
+ if err := capi.getExistingMachineTemplate(ctx, azureTemplate); err != nil {
+ if apierrors.IsNotFound(err) {
+ return nil
+ }
+ return fmt.Errorf("failed to get AzureMachineTemplate: %w", err)
+ }
+ machineTemplate = azureTemplate
default:
return fmt.Errorf("unsupported platform for scale-from-zero: %s", nodePool.Spec.Platform.Type)
diff --git a/hypershift-operator/controllers/nodepool/scale_from_zero.go b/hypershift-operator/controllers/nodepool/scale_from_zero.go
index 5d24335bc19..b67c2a4ca65 100644
--- a/hypershift-operator/controllers/nodepool/scale_from_zero.go
+++ b/hypershift-operator/controllers/nodepool/scale_from_zero.go
@@ -27,6 +27,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
+ capiazure "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
@@ -80,10 +81,8 @@ func setScaleFromZeroAnnotationsOnObject(ctx context.Context, provider instancet
case *infrav1.AWSMachineTemplate:
instanceType = template.Spec.Template.Spec.InstanceType
statusCapacity = template.Status.Capacity
- // Future platform support can be added here:
- // case *capiazure.AzureMachineTemplate:
- // instanceType = template.Spec.Template.Spec.VMSize
- // statusCapacity = template.Status.Capacity
+ case *capiazure.AzureMachineTemplate:
+ instanceType = template.Spec.Template.Spec.VMSize
default:
return fmt.Errorf("unsupported machine template type: %T", machineTemplate)
}
diff --git a/hypershift-operator/controllers/nodepool/scale_from_zero_test.go b/hypershift-operator/controllers/nodepool/scale_from_zero_test.go
index bec100f788b..ae16ff36060 100644
--- a/hypershift-operator/controllers/nodepool/scale_from_zero_test.go
+++ b/hypershift-operator/controllers/nodepool/scale_from_zero_test.go
@@ -15,6 +15,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
+ capiazure "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
capiv1 "sigs.k8s.io/cluster-api/api/core/v1beta1"
)
@@ -90,6 +91,16 @@ func TestSetScaleFromZeroAnnotationsOnObject(t *testing.T) {
}
}
+ newAzureTemplate := func(vmSize string) *capiazure.AzureMachineTemplate {
+ return &capiazure.AzureMachineTemplate{
+ Spec: capiazure.AzureMachineTemplateSpec{
+ Template: capiazure.AzureMachineTemplateResource{
+ Spec: capiazure.AzureMachineSpec{VMSize: vmSize},
+ },
+ },
+ }
+ }
+
tests := []struct {
name string
provider instancetype.Provider
@@ -200,6 +211,66 @@ func TestSetScaleFromZeroAnnotationsOnObject(t *testing.T) {
g.Expect(a).ToNot(HaveKey(taintsKey))
},
},
+ {
+ name: "When Azure template with valid VMSize and no GPU it should set basic annotations",
+ provider: &mockProvider{info: &instancetype.InstanceTypeInfo{
+ VCPU: 4, MemoryMb: 16384, GPU: 0, CPUArchitecture: "amd64",
+ }},
+ nodePool: &hyperv1.NodePool{},
+ object: &capiv1.MachineDeployment{},
+ machineTemplate: newAzureTemplate("Standard_D4s_v3"),
+ expectErr: false,
+ validate: func(g Gomega, md *capiv1.MachineDeployment) {
+ a := md.GetAnnotations()
+ g.Expect(a).To(HaveKeyWithValue(cpuKey, "4"))
+ g.Expect(a).To(HaveKeyWithValue(memoryKey, "16384"))
+ g.Expect(a).To(HaveKeyWithValue(labelsKey, "kubernetes.io/arch=amd64"))
+ g.Expect(a).ToNot(HaveKey(gpuKey))
+ },
+ },
+ {
+ name: "When Azure template with empty VMSize it should return error",
+ provider: &mockProvider{},
+ nodePool: &hyperv1.NodePool{},
+ object: &capiv1.MachineDeployment{},
+ machineTemplate: newAzureTemplate(""),
+ expectErr: true,
+ errSubstring: "instanceType is empty",
+ },
+ {
+ name: "When Azure template with nil provider it should skip annotations",
+ provider: nil,
+ nodePool: &hyperv1.NodePool{},
+ object: &capiv1.MachineDeployment{},
+ machineTemplate: newAzureTemplate("Standard_D4s_v3"),
+ expectErr: false,
+ validate: func(g Gomega, md *capiv1.MachineDeployment) {
+ g.Expect(md.GetAnnotations()).ToNot(HaveKey(cpuKey))
+ },
+ },
+ {
+ name: "When Azure template with GPU and taints it should set all annotations",
+ provider: &mockProvider{info: &instancetype.InstanceTypeInfo{
+ VCPU: 6, MemoryMb: 114688, GPU: 1, CPUArchitecture: "amd64",
+ }},
+ nodePool: &hyperv1.NodePool{
+ Spec: hyperv1.NodePoolSpec{
+ Taints: []hyperv1.Taint{
+ {Key: "dedicated", Value: "gpu", Effect: corev1.TaintEffectNoSchedule},
+ },
+ },
+ },
+ object: &capiv1.MachineDeployment{},
+ machineTemplate: newAzureTemplate("Standard_NC6s_v3"),
+ expectErr: false,
+ validate: func(g Gomega, md *capiv1.MachineDeployment) {
+ a := md.GetAnnotations()
+ g.Expect(a).To(HaveKeyWithValue(cpuKey, "6"))
+ g.Expect(a).To(HaveKeyWithValue(memoryKey, "114688"))
+ g.Expect(a).To(HaveKeyWithValue(gpuKey, "1"))
+ g.Expect(a).To(HaveKeyWithValue(taintsKey, "dedicated=gpu:NoSchedule"))
+ },
+ },
{
name: "When instance has GPU, labels with arch override, taints, and existing annotations it should set all correctly",
provider: &mockProvider{info: &instancetype.InstanceTypeInfo{
diff --git a/hypershift-operator/main.go b/hypershift-operator/main.go
index a663242bf0f..680084874e4 100644
--- a/hypershift-operator/main.go
+++ b/hypershift-operator/main.go
@@ -17,6 +17,7 @@ package main
import (
"context"
"crypto/tls"
+ "encoding/json"
"fmt"
"os"
"strings"
@@ -35,6 +36,7 @@ import (
"github.com/openshift/hypershift/hypershift-operator/controllers/nodepool"
"github.com/openshift/hypershift/hypershift-operator/controllers/nodepool/instancetype"
awsinstancetype "github.com/openshift/hypershift/hypershift-operator/controllers/nodepool/instancetype/aws"
+ azureinstancetype "github.com/openshift/hypershift/hypershift-operator/controllers/nodepool/instancetype/azure"
npmetrics "github.com/openshift/hypershift/hypershift-operator/controllers/nodepool/metrics"
"github.com/openshift/hypershift/hypershift-operator/controllers/platform/aws"
azureplatform "github.com/openshift/hypershift/hypershift-operator/controllers/platform/azure"
@@ -71,6 +73,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v5"
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
@@ -351,7 +354,7 @@ func validateStartOptions(opts *StartOptions, log logr.Logger) error {
return fmt.Errorf("--etcd-backup-max-count must be at least 1, got %d", opts.EtcdBackupMaxCount)
}
- supportedProviders := set.New("aws")
+ supportedProviders := set.New("aws", "azure")
if opts.ScaleFromZeroCreds != "" {
if opts.ScaleFromZeroProvider == "" {
return fmt.Errorf("--scale-from-zero-provider is required when using --scale-from-zero-creds")
@@ -596,6 +599,8 @@ func setupEC2Client(ctx context.Context, opts *StartOptions) awsapi.EC2API {
func setupNodePoolController(ctx context.Context, mgr ctrl.Manager, opts *StartOptions, operatorImage string, createOrUpdate upsert.CreateOrUpdateProvider, registryProvider globalconfig.CommonRegistryProvider, ec2Client awsapi.EC2API, log logr.Logger) error {
var instanceTypeProvider instancetype.Provider
+ var scaleFromZeroPlatform hyperv1.PlatformType
+
if opts.ScaleFromZeroCreds != "" && opts.ScaleFromZeroProvider != "" {
switch strings.ToLower(opts.ScaleFromZeroProvider) {
case "aws":
@@ -605,7 +610,65 @@ func setupNodePoolController(ctx context.Context, mgr ctrl.Manager, opts *StartO
o.Retryer = awsConfig()
})
instanceTypeProvider = awsinstancetype.NewProvider(scaleFromZeroEC2Client)
+ scaleFromZeroPlatform = hyperv1.AWSPlatform
log.Info("Instance type provider initialized", "provider", opts.ScaleFromZeroProvider)
+ case "azure":
+ raw, err := os.ReadFile(opts.ScaleFromZeroCreds)
+ if err != nil {
+ return fmt.Errorf("failed to read Azure scale-from-zero credentials: %w", err)
+ }
+ var azureCreds struct {
+ SubscriptionID string `json:"subscriptionId"`
+ ClientID string `json:"clientId"`
+ ClientSecret string `json:"clientSecret"`
+ TenantID string `json:"tenantId"`
+ Location string `json:"location"`
+ }
+ if err := json.Unmarshal(raw, &azureCreds); err != nil {
+ return fmt.Errorf("failed to parse Azure scale-from-zero credentials: %w", err)
+ }
+ var missing []string
+ if azureCreds.SubscriptionID == "" {
+ missing = append(missing, "subscriptionId")
+ }
+ if azureCreds.ClientID == "" {
+ missing = append(missing, "clientId")
+ }
+ if azureCreds.ClientSecret == "" {
+ missing = append(missing, "clientSecret")
+ }
+ if azureCreds.TenantID == "" {
+ missing = append(missing, "tenantId")
+ }
+ if azureCreds.Location == "" {
+ missing = append(missing, "location")
+ }
+ if len(missing) > 0 {
+ return fmt.Errorf("azure scale-from-zero credentials missing required fields: %s", strings.Join(missing, ", "))
+ }
+ azureCloudName := os.Getenv("AZURE_CLOUD_NAME")
+ if azureCloudName == "" {
+ azureCloudName = config.DefaultAzureCloud
+ }
+ cloudConfig, err := azureutil.GetAzureCloudConfiguration(azureCloudName)
+ if err != nil {
+ return fmt.Errorf("failed to get Azure cloud configuration for scale-from-zero: %w", err)
+ }
+ cred, err := azidentity.NewClientSecretCredential(azureCreds.TenantID, azureCreds.ClientID, azureCreds.ClientSecret,
+ &azidentity.ClientSecretCredentialOptions{
+ ClientOptions: azcore.ClientOptions{Cloud: cloudConfig},
+ },
+ )
+ if err != nil {
+ return fmt.Errorf("failed to create Azure credentials for scale-from-zero: %w", err)
+ }
+ skuClient, err := armcompute.NewResourceSKUsClient(azureCreds.SubscriptionID, cred, azureutil.NewARMClientOptions(cloudConfig))
+ if err != nil {
+ return fmt.Errorf("failed to create Azure ResourceSKUs client: %w", err)
+ }
+ instanceTypeProvider = azureinstancetype.NewProvider(skuClient, azureCreds.Location)
+ scaleFromZeroPlatform = hyperv1.AzurePlatform
+ log.Info("Instance type provider initialized", "provider", opts.ScaleFromZeroProvider, "location", azureCreds.Location)
default:
log.Info("WARNING: Unsupported scale-from-zero provider", "provider", opts.ScaleFromZeroProvider)
}
@@ -620,6 +683,7 @@ func setupNodePoolController(ctx context.Context, mgr ctrl.Manager, opts *StartO
KubevirtInfraClients: kvinfra.NewKubevirtInfraClientMap(),
EC2Client: ec2Client,
InstanceTypeProvider: instanceTypeProvider,
+ ScaleFromZeroPlatform: scaleFromZeroPlatform,
}).SetupWithManager(mgr); err != nil {
return fmt.Errorf("unable to create controller: %w", err)
}
diff --git a/test/e2e/autoscaling_test.go b/test/e2e/autoscaling_test.go
index 62f5c07374b..270af8cd991 100644
--- a/test/e2e/autoscaling_test.go
+++ b/test/e2e/autoscaling_test.go
@@ -675,8 +675,8 @@ func testAutoscalerRespectsNodePoolPause(ctx context.Context, mgtClient crclient
}
func TestNodePoolAutoscalingScaleFromZero(t *testing.T) {
- if globalOpts.Platform != hyperv1.AWSPlatform {
- t.Skip("test only supported on platform AWS")
+ if globalOpts.Platform != hyperv1.AWSPlatform && globalOpts.Platform != hyperv1.AzurePlatform {
+ t.Skip("test only supported on AWS and Azure platforms")
}
// Get management client to check for scale-from-zero secret
diff --git a/test/e2e/nodepool_test.go b/test/e2e/nodepool_test.go
index 6e40796a160..f72452ab793 100644
--- a/test/e2e/nodepool_test.go
+++ b/test/e2e/nodepool_test.go
@@ -12,10 +12,12 @@ import (
hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
"github.com/openshift/hypershift/support/conditions"
e2eutil "github.com/openshift/hypershift/test/e2e/util"
+
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
+
crclient "sigs.k8s.io/controller-runtime/pkg/client"
. "github.com/onsi/gomega"
diff --git a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/nodepool_types.go b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/nodepool_types.go
index dc17f42ee9e..5ebc5378064 100644
--- a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/nodepool_types.go
+++ b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/nodepool_types.go
@@ -106,7 +106,7 @@ type NodePool struct {
// +kubebuilder:validation:XValidation:rule="!has(self.replicas) || !has(self.autoScaling)", message="Both replicas or autoScaling should not be set"
// +kubebuilder:validation:XValidation:rule="self.arch != 's390x' || has(self.platform.kubevirt)", message="s390x is only supported on KubeVirt platform"
// +kubebuilder:validation:XValidation:rule="!has(self.platform.aws) || !has(self.platform.aws.imageType) || self.platform.aws.imageType != 'Windows' || self.arch == 'amd64'", message="ImageType 'Windows' requires arch 'amd64' (AWS only)"
-// +kubebuilder:validation:XValidation:rule="!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type == 'AWS'", message="Scale-from-zero (autoScaling.min=0) is currently only supported for AWS platform"
+// +kubebuilder:validation:XValidation:rule="!has(self.autoScaling) || self.autoScaling.min > 0 || self.platform.type == 'AWS' || self.platform.type == 'Azure'", message="Scale-from-zero (autoScaling.min=0) is currently only supported for AWS and Azure platforms"
type NodePoolSpec struct {
// clusterName is the name of the HostedCluster this NodePool belongs to.
// If a HostedCluster with this name doesn't exist, the controller will no-op until it exists.
@@ -501,7 +501,7 @@ type NodePoolManagement struct {
// +kubebuilder:validation:XValidation:rule="self.max >= self.min", message="max must be equal or greater than min"
type NodePoolAutoScaling struct {
// min is the minimum number of nodes to maintain in the pool.
- // Can be set to 0 for scale-from-zero for AWS platform.
+ // Can be set to 0 for scale-from-zero for AWS and Azure platforms.
// Must be >= 0 and <= .Max.
//
// +kubebuilder:validation:Minimum=0