Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import (

v1alpha1 "github.com/osac-project/bare-metal-fulfillment-operator/api/v1alpha1"
"github.com/osac-project/host-management-openstack/internal/controller"
"github.com/osac-project/host-management-openstack/internal/ironic"
"github.com/osac-project/host-management-openstack/internal/management"
"github.com/osac-project/osac-operator/pkg/aap"
"github.com/osac-project/osac-operator/pkg/provisioning"
// +kubebuilder:scaffold:imports
Expand Down Expand Up @@ -233,15 +233,17 @@ func main() {
os.Exit(1)
}

// Ironic client for bare metal management
var ironicClient *ironic.Client
ironicCtx, ironicCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer ironicCancel()
if ironicClient, err = ironic.NewClient(ironicCtx); err != nil {
setupLog.Error(err, "failed to create Ironic client")
mgmtCtx, mgmtCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer mgmtCancel()
managementClient, err := management.NewOpenStackClient(mgmtCtx, &management.Config{
Type: "openstack",
})
if err != nil {
setupLog.V(1).Info("management client creation failed", "error", err)
setupLog.Error(nil, "failed to create management client (check cloud credentials and endpoint configuration)")
os.Exit(1)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
setupLog.Info("Connect to ironic", "endpoint", ironicClient.GetEndpoint())
setupLog.Info("Management client created", "type", "openstack")

// AAP provisioning provider for image provisioning workflows
var provisioningProvider provisioning.ProvisioningProvider
Expand Down Expand Up @@ -272,7 +274,7 @@ func main() {
hostLeaseReconciler := controller.NewHostLeaseReconciler(
mgr.GetClient(),
mgr.GetScheme(),
ironicClient,
managementClient,
provisioningProvider,
0, // Use DefaultRecheckInterval
)
Expand Down
94 changes: 55 additions & 39 deletions internal/controller/hostlease_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"time"

"github.com/go-logr/logr"
"github.com/gophercloud/gophercloud/v2/openstack/baremetal/v1/nodes"
"k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -37,30 +36,24 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"

v1alpha1 "github.com/osac-project/bare-metal-fulfillment-operator/api/v1alpha1"
"github.com/osac-project/host-management-openstack/internal/ironic"
"github.com/osac-project/host-management-openstack/internal/management"
opv1alpha1 "github.com/osac-project/osac-operator/api/v1alpha1"
"github.com/osac-project/osac-operator/pkg/provisioning"
)

// HostLeaseReconciler reconciles HostLease CRs for power management via Ironic.
type HostLeaseReconciler struct {
client.Client
Scheme *runtime.Scheme
IronicClient ironic.NodeClient
ProvisioningProvider provisioning.ProvisioningProvider

// RecheckInterval is the interval for polling Ironic until power state matches desired state.
RecheckInterval time.Duration

// ProvisionPollInterval is the interval for polling AAP job status.
Scheme *runtime.Scheme
ManagementClient management.Client
ProvisioningProvider provisioning.ProvisioningProvider
RecheckInterval time.Duration
ProvisionPollInterval time.Duration
}

// NewHostLeaseReconciler creates a new HostLeaseReconciler with defaults applied.
func NewHostLeaseReconciler(
client client.Client,
scheme *runtime.Scheme,
ironicClient ironic.NodeClient,
managementClient management.Client,
provider provisioning.ProvisioningProvider,
recheckInterval time.Duration,
) *HostLeaseReconciler {
Expand All @@ -71,7 +64,7 @@ func NewHostLeaseReconciler(
return &HostLeaseReconciler{
Client: client,
Scheme: scheme,
IronicClient: ironicClient,
ManagementClient: managementClient,
ProvisioningProvider: provider,
RecheckInterval: recheckInterval,
ProvisionPollInterval: DefaultProvisionPollInterval,
Expand Down Expand Up @@ -143,33 +136,44 @@ func (r *HostLeaseReconciler) handleUpdate(ctx context.Context, hostLease *v1alp
}
}

node, err := r.IronicClient.GetNode(ctx, hostLease.Spec.ExternalHostID)
powerStatus, err := r.ManagementClient.GetPowerState(ctx, hostLease.Spec.ExternalHostID)
if err != nil {
log.Error(err, "failed to get Ironic node", "nodeID", hostLease.Spec.ExternalHostID)
log.Error(err, "failed to get power state", "nodeID", hostLease.Spec.ExternalHostID)
r.syncHostLeaseStatus(hostLease, nil, err, log)
return ctrl.Result{}, err
}
if powerStatus == nil {
err := fmt.Errorf("management backend returned nil power status for host %s", hostLease.Spec.ExternalHostID)
log.Error(err, "unexpected nil power status", "nodeID", hostLease.Spec.ExternalHostID)
r.syncHostLeaseStatus(hostLease, nil, err, log)
return ctrl.Result{}, err
}
log.V(1).Info("Ironic node", "nodeID", hostLease.Spec.ExternalHostID, "power_state", node.PowerState)
log.V(1).Info("Host power state", "nodeID", hostLease.Spec.ExternalHostID, "power_state", powerStatus.State)
Comment thread
coderabbitai[bot] marked this conversation as resolved.

if hostLease.Spec.PoweredOn != nil {
if err := r.reconcilePower(ctx, hostLease, node, log); err != nil {
if err := r.reconcilePower(ctx, hostLease, powerStatus, log); err != nil {
r.syncHostLeaseStatus(hostLease, nil, err, log)
return ctrl.Result{}, err
}

node, err = r.IronicClient.GetNode(ctx, hostLease.Spec.ExternalHostID)
powerStatus, err = r.ManagementClient.GetPowerState(ctx, hostLease.Spec.ExternalHostID)
if err != nil {
log.Error(err, "failed to refresh node after power reconciliation", "nodeID", hostLease.Spec.ExternalHostID)
log.Error(err, "failed to refresh power state after reconciliation", "nodeID", hostLease.Spec.ExternalHostID)
r.syncHostLeaseStatus(hostLease, nil, err, log)
return ctrl.Result{}, err
}
if powerStatus == nil {
err := fmt.Errorf("management backend returned nil power status for host %s", hostLease.Spec.ExternalHostID)
log.Error(err, "unexpected nil power status after reconciliation", "nodeID", hostLease.Spec.ExternalHostID)
r.syncHostLeaseStatus(hostLease, nil, err, log)
return ctrl.Result{}, err
}
}

r.syncHostLeaseStatus(hostLease, node, nil, log)
r.syncHostLeaseStatus(hostLease, powerStatus, nil, log)

if hostLease.Spec.PoweredOn != nil {
currentlyOn := node.PowerState == ironic.PowerOn.String()
if *hostLease.Spec.PoweredOn != currentlyOn {
if powerStatus.IsTransitioning || *hostLease.Spec.PoweredOn != (powerStatus.State == management.PowerOn) {
hostLease.Status.Phase = v1alpha1.HostLeasePhaseProgressing
return ctrl.Result{RequeueAfter: r.RecheckInterval}, nil
}
Expand Down Expand Up @@ -303,38 +307,41 @@ func (r *HostLeaseReconciler) validateOpenStackHost(hostLease *v1alpha1.HostLeas
return true
}

func (r *HostLeaseReconciler) reconcilePower(ctx context.Context, hostLease *v1alpha1.HostLease, node *nodes.Node, log logr.Logger) error {
currentlyOn := node.PowerState == ironic.PowerOn.String()
func (r *HostLeaseReconciler) reconcilePower(ctx context.Context, hostLease *v1alpha1.HostLease, powerStatus *management.PowerStatus, log logr.Logger) error {
currentlyOn := powerStatus.State == management.PowerOn
desiredOn := *hostLease.Spec.PoweredOn

// If Ironic is already processing a power state change, skip to avoid 409 Conflict.
if r.IronicClient.IsNodePowerTransitioning(node) {
if powerStatus.IsTransitioning {
log.V(1).Info("Node is transitioning, skipping power action",
"nodeID", hostLease.Spec.ExternalHostID,
"targetPowerState", node.TargetPowerState)
"nodeID", hostLease.Spec.ExternalHostID)
return nil
}

var err error
needsPowerUpdate := desiredOn != currentlyOn
if !needsPowerUpdate {
log.Info("Power state already matches desired", "poweredOn", desiredOn, "power_state", node.PowerState)
log.Info("Power state already matches desired", "poweredOn", desiredOn, "power_state", powerStatus.State)
return nil
}

targetState := ironic.PowerOff
targetState := management.PowerOff
action := "off"
if desiredOn {
targetState = ironic.PowerOn
targetState = management.PowerOn
action = "on"
}

log.Info("Powering "+action+" node", "nodeID", hostLease.Spec.ExternalHostID)
if err = r.IronicClient.SetPowerState(ctx, hostLease.Spec.ExternalHostID, targetState); err != nil {
if err := r.ManagementClient.SetPowerState(ctx, hostLease.Spec.ExternalHostID, targetState); err != nil {
if errors.Is(err, management.ErrTransitioning) {
log.Info("Node is transitioning (conflict), will retry",
"nodeID", hostLease.Spec.ExternalHostID)
return nil
}
log.Error(err, "failed to power "+action+" node", "nodeID", hostLease.Spec.ExternalHostID)
return err
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

return err
return nil
}

func (r *HostLeaseReconciler) reconcileProvisioning(ctx context.Context, hostLease *v1alpha1.HostLease) (ctrl.Result, error) {
Expand Down Expand Up @@ -393,8 +400,7 @@ func (r *HostLeaseReconciler) reconcileProvisioning(ctx context.Context, hostLea
return result, nil
}

// syncHostLeaseStatus syncs power-related conditions and observed power state in memory.
func (r *HostLeaseReconciler) syncHostLeaseStatus(hostLease *v1alpha1.HostLease, node *nodes.Node, reconcileErr error, log logr.Logger) {
func (r *HostLeaseReconciler) syncHostLeaseStatus(hostLease *v1alpha1.HostLease, powerStatus *management.PowerStatus, reconcileErr error, log logr.Logger) {
if reconcileErr != nil {
hostLease.Status.Phase = v1alpha1.HostLeasePhaseFailed
hostLease.SetStatusCondition(
Expand All @@ -407,13 +413,23 @@ func (r *HostLeaseReconciler) syncHostLeaseStatus(hostLease *v1alpha1.HostLease,
return
}

if node == nil {
if powerStatus == nil {
return
}

poweredOn := node.PowerState == ironic.PowerOn.String()
poweredOn := powerStatus.State == management.PowerOn
hostLease.Status.PoweredOn = &poweredOn

if powerStatus.IsTransitioning {
hostLease.SetStatusCondition(
v1alpha1.HostConditionPowerSynced,
metav1.ConditionFalse,
v1alpha1.HostConditionReasonProgressing,
"node power state is transitioning",
)
return
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

if hostLease.Spec.PoweredOn != nil && *hostLease.Spec.PoweredOn != poweredOn {
hostLease.SetStatusCondition(
v1alpha1.HostConditionPowerSynced,
Expand Down
Loading
Loading