Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 26 additions & 24 deletions hypershift-operator/controllers/nodepool/capi.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,32 @@ func (c *CAPI) Reconcile(ctx context.Context) error {
}
}

// Reconcile spot-specific MachineHealthCheck when spot instances are enabled.
// This is placed before the autoRepair gate because the spot MHC must exist
// independently of autoRepair — it serves as a safety net for spot instance
// failures even when the ignition endpoint has not been reached yet.
spotMHC := c.spotMachineHealthCheck()
if isSpotEnabled(nodePool) {
if result, err := ctrl.CreateOrUpdate(ctx, c.Client, spotMHC, func() error {
return c.reconcileSpotMachineHealthCheck(ctx, spotMHC)
}); err != nil {
return fmt.Errorf("failed to reconcile spot MachineHealthCheck %q: %w",
client.ObjectKeyFromObject(spotMHC).String(), err)
} else {
log.Info("Reconciled spot MachineHealthCheck", "result", result)
}
} else {
err := c.Get(ctx, client.ObjectKeyFromObject(spotMHC), spotMHC)
if err != nil && !apierrors.IsNotFound(err) {
return err
}
if err == nil {
if err := c.Delete(ctx, spotMHC); err != nil && !apierrors.IsNotFound(err) {
return err
}
}
}

mhc := c.machineHealthCheck()
if nodePool.Spec.Management.AutoRepair {
if c := FindStatusCondition(nodePool.Status.Conditions, hyperv1.NodePoolReachedIgnitionEndpoint); c == nil || c.Status != corev1.ConditionTrue {
Expand Down Expand Up @@ -190,30 +216,6 @@ func (c *CAPI) Reconcile(ctx context.Context) error {
})
}

// Reconcile spot-specific MachineHealthCheck when spot instances are enabled
spotMHC := c.spotMachineHealthCheck()
if isSpotEnabled(nodePool) {
if result, err := ctrl.CreateOrUpdate(ctx, c.Client, spotMHC, func() error {
return c.reconcileSpotMachineHealthCheck(ctx, spotMHC)
}); err != nil {
return fmt.Errorf("failed to reconcile spot MachineHealthCheck %q: %w",
client.ObjectKeyFromObject(spotMHC).String(), err)
} else {
log.Info("Reconciled spot MachineHealthCheck", "result", result)
}
} else {
// Delete spot MHC if spot is not enabled
err := c.Get(ctx, client.ObjectKeyFromObject(spotMHC), spotMHC)
if err != nil && !apierrors.IsNotFound(err) {
return err
}
if err == nil {
if err := c.Delete(ctx, spotMHC); err != nil && !apierrors.IsNotFound(err) {
return err
}
}
}

return nil
}

Expand Down
230 changes: 230 additions & 0 deletions hypershift-operator/controllers/nodepool/capi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3279,3 +3279,233 @@ func TestNewCAPI(t *testing.T) {
})
}
}

func TestSpotMHCCreatedIndependentlyOfAutoRepairIgnitionGate(t *testing.T) {
t.Parallel()

maxUnavailable := intstr.FromInt(0)
maxSurge := intstr.FromInt(1)
awsMachineTemplateName := "test-nodepool-28d5cf5a"
capiClusterName := "infra-id"

tests := []struct {
name string
autoRepair bool
ignitionReached bool
spotEnabled bool
expectSpotMHC bool
expectRegularMHC bool
expectAutoRepairStatus corev1.ConditionStatus
}{
{
name: "When spot is enabled and autoRepair is true and ignition is NOT reached it should create spot MHC and not create regular MHC",
autoRepair: true,
ignitionReached: false,
spotEnabled: true,
expectSpotMHC: true,
expectRegularMHC: false,
expectAutoRepairStatus: "",
},
{
name: "When spot is enabled and autoRepair is true and ignition is reached it should create both MHCs",
autoRepair: true,
ignitionReached: true,
spotEnabled: true,
expectSpotMHC: true,
expectRegularMHC: true,
expectAutoRepairStatus: corev1.ConditionTrue,
},
{
name: "When spot is enabled and autoRepair is false it should create spot MHC and not create regular MHC",
autoRepair: false,
ignitionReached: false,
spotEnabled: true,
expectSpotMHC: true,
expectRegularMHC: false,
expectAutoRepairStatus: corev1.ConditionFalse,
},
{
name: "When spot is NOT enabled and autoRepair is true and ignition is NOT reached it should not create any MHC",
autoRepair: true,
ignitionReached: false,
spotEnabled: false,
expectSpotMHC: false,
expectRegularMHC: false,
expectAutoRepairStatus: "",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
g := NewWithT(t)

annotations := map[string]string{}
if tt.spotEnabled {
annotations[AnnotationEnableSpot] = "true"
}

var conditions []hyperv1.NodePoolCondition
if tt.ignitionReached {
conditions = append(conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolReachedIgnitionEndpoint,
Status: corev1.ConditionTrue,
})
} else {
conditions = append(conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolReachedIgnitionEndpoint,
Status: corev1.ConditionFalse,
})
}

nodePool := &hyperv1.NodePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-nodepool",
Namespace: "test-namespace",
Annotations: annotations,
},
Spec: hyperv1.NodePoolSpec{
ClusterName: "test-cluster",
Management: hyperv1.NodePoolManagement{
UpgradeType: hyperv1.UpgradeTypeReplace,
Replace: &hyperv1.ReplaceUpgrade{
Strategy: hyperv1.UpgradeStrategyRollingUpdate,
RollingUpdate: &hyperv1.RollingUpdate{
MaxUnavailable: &maxUnavailable,
MaxSurge: &maxSurge,
},
},
AutoRepair: tt.autoRepair,
},
Replicas: ptr.To[int32](1),
Platform: hyperv1.NodePoolPlatform{
Type: hyperv1.AWSPlatform,
AWS: &hyperv1.AWSNodePoolPlatform{
AMI: "an-ami",
},
},
},
Status: hyperv1.NodePoolStatus{
Conditions: conditions,
},
}

hostedCluster := &hyperv1.HostedCluster{
ObjectMeta: metav1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"},
Spec: hyperv1.HostedClusterSpec{
Platform: hyperv1.PlatformSpec{
Type: hyperv1.AWSPlatform,
AWS: &hyperv1.AWSPlatformSpec{
Region: "",
CloudProviderConfig: &hyperv1.AWSCloudProviderConfig{},
ServiceEndpoints: []hyperv1.AWSServiceEndpoint{},
RolesRef: hyperv1.AWSRolesRef{},
ResourceTags: []hyperv1.AWSResourceTag{},
EndpointAccess: "",
AdditionalAllowedPrincipals: []string{},
MultiArch: false,
},
},
},
}

machineSet := &capiv1.MachineSet{
ObjectMeta: metav1.ObjectMeta{
Name: "test-machineset",
Namespace: "test-namespace-test-cluster",
Annotations: map[string]string{
nodePoolAnnotation: "test-namespace/test-nodepool",
},
},
Spec: capiv1.MachineSetSpec{
Template: capiv1.MachineTemplateSpec{
Spec: capiv1.MachineSpec{
InfrastructureRef: corev1.ObjectReference{
Kind: "AWSMachineTemplate",
APIVersion: "infrastructure.cluster.x-k8s.io/v1beta2",
Namespace: "test-namespace-test-cluster",
Name: awsMachineTemplateName,
},
},
},
},
}

template := &capiaws.AWSMachineTemplate{
ObjectMeta: metav1.ObjectMeta{
Name: awsMachineTemplateName,
Namespace: "test-namespace-test-cluster",
Annotations: map[string]string{
nodePoolAnnotation: "test-namespace/test-nodepool",
},
},
}

controlpaneNamespace := manifests.HostedControlPlaneNamespace(hostedCluster.Namespace, hostedCluster.Name)
c := fake.NewClientBuilder().
WithScheme(api.Scheme).
WithObjects(nodePool, machineSet, template).
Build()

capi := &CAPI{
Token: &Token{
ConfigGenerator: &ConfigGenerator{
Client: c,
hostedCluster: hostedCluster,
nodePool: nodePool,
controlplaneNamespace: controlpaneNamespace,
rolloutConfig: &rolloutConfig{
releaseImage: &releaseinfo.ReleaseImage{
ImageStream: &imageapi.ImageStream{
ObjectMeta: metav1.ObjectMeta{
Name: "target-version",
},
},
},
},
},
cpoCapabilities: &CPOCapabilities{},
CreateOrUpdateProvider: upsert.New(false),
},
capiClusterName: capiClusterName,
ApplyProvider: upsert.NewApplyProvider(false),
}

err := capi.Reconcile(t.Context())
g.Expect(err).NotTo(HaveOccurred())

spotMHC := &capiv1.MachineHealthCheck{}
spotMHCErr := c.Get(t.Context(), client.ObjectKey{
Namespace: controlpaneNamespace,
Name: nodePool.GetName() + "-spot",
}, spotMHC)

if tt.expectSpotMHC {
g.Expect(spotMHCErr).NotTo(HaveOccurred(), "spot MHC should exist")
g.Expect(spotMHC.Spec.Selector.MatchLabels).To(HaveKeyWithValue(interruptibleInstanceLabel, ""))
g.Expect(spotMHC.Spec.MaxUnhealthy.String()).To(Equal("100%"))
} else {
g.Expect(apierrors.IsNotFound(spotMHCErr)).To(BeTrue(), "spot MHC should not exist")
}

regularMHC := &capiv1.MachineHealthCheck{}
regularMHCErr := c.Get(t.Context(), client.ObjectKey{
Namespace: controlpaneNamespace,
Name: nodePool.GetName(),
}, regularMHC)

if tt.expectRegularMHC {
g.Expect(regularMHCErr).NotTo(HaveOccurred(), "regular MHC should exist")
g.Expect(regularMHC.Spec.ClusterName).To(Equal(capiClusterName))
} else {
g.Expect(apierrors.IsNotFound(regularMHCErr)).To(BeTrue(), "regular MHC should not exist")
}

if tt.expectAutoRepairStatus != "" {
cond := FindStatusCondition(nodePool.Status.Conditions, hyperv1.NodePoolAutorepairEnabledConditionType)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(Equal(tt.expectAutoRepairStatus))
}
})
}
}
Loading