Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ limitations under the License.
package main

import (
"context"
"crypto/tls"
"flag"
"fmt"
"os"
"path/filepath"
"strconv"
"time"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
Expand Down Expand Up @@ -233,7 +235,9 @@ func main() {

// Ironic client for bare metal management
var ironicClient *ironic.Client
if ironicClient, err = ironic.NewClient(); err != nil {
ironicCtx, ironicCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer ironicCancel()
if ironicClient, err = ironic.NewClient(ironicCtx); err != nil {
setupLog.Error(err, "failed to create Ironic client")
os.Exit(1)
}
Expand Down
81 changes: 75 additions & 6 deletions internal/ironic/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ package ironic

import (
"context"
"errors"
"fmt"
"net/http"

"github.com/gophercloud/gophercloud/v2"
"github.com/gophercloud/gophercloud/v2/openstack/baremetal/v1/nodes"
"github.com/gophercloud/utils/v2/openstack/clientconfig"
ctrllog "sigs.k8s.io/controller-runtime/pkg/log"
)

// NodeClient is the interface for interacting with Ironic nodes.
Expand All @@ -36,7 +39,8 @@ type NodeClient interface {
// Client talks to Ironic over REST via gophercloud.
type (
Client struct {
serviceClient *gophercloud.ServiceClient
serviceClient *gophercloud.ServiceClient
newServiceClient func(ctx context.Context) (*gophercloud.ServiceClient, error)
}

TargetPowerState struct {
Expand All @@ -56,24 +60,77 @@ var (
SoftRebooting = TargetPowerState{nodes.SoftRebooting}
)

// NewClient creates an Ironic client with no-auth (typical for in-cluster Ironic with no Keystone).
func NewClient() (*Client, error) {
client, err := clientconfig.NewServiceClient(context.TODO(), "baremetal", nil)
// NewClient creates an Ironic client configured via clouds.yaml or OS_* environment variables.
func NewClient(ctx context.Context) (*Client, error) {
factory := func(ctx context.Context) (*gophercloud.ServiceClient, error) {
return clientconfig.NewServiceClient(ctx, "baremetal", nil)
}
sc, err := factory(ctx)
if err != nil {
return nil, fmt.Errorf("failed to create baremetal client: %w", err)
}
if client.Endpoint == "" {
if sc == nil {
return nil, fmt.Errorf("failed to create baremetal client: nil service client")
}
if sc.Endpoint == "" {
return nil, fmt.Errorf("baremetal client has no endpoint configured")
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
return &Client{
serviceClient: client,
serviceClient: sc,
newServiceClient: factory,
}, nil
}

func isAuthError(err error) bool {
if err == nil {
return false
}
if gophercloud.ResponseCodeIs(err, http.StatusUnauthorized) {
return true
}
var errReauth *gophercloud.ErrUnableToReauthenticate
if errors.As(err, &errReauth) {
return true
}
var errAfterReauth *gophercloud.ErrErrorAfterReauthentication
return errors.As(err, &errAfterReauth)
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

func (c *Client) reconnect(ctx context.Context) error {
Comment thread
DanNiESh marked this conversation as resolved.
log := ctrllog.FromContext(ctx)
log.Info("recreating ironic service client after authentication failure")
sc, err := c.newServiceClient(ctx)
if err != nil {
log.Error(err, "failed to recreate ironic service client")
return fmt.Errorf("failed to recreate baremetal client: %w", err)
}
if sc == nil {
return fmt.Errorf("failed to recreate baremetal client: nil service client")
}
if sc.Endpoint == "" {
return fmt.Errorf("recreated baremetal client has no endpoint configured")
}
c.serviceClient = sc
log.Info("ironic service client reconnected successfully", "endpoint", sc.Endpoint)
return nil
}

// GetNode fetches a node by UUID or name from Ironic.
func (c *Client) GetNode(ctx context.Context, nodeID string) (*nodes.Node, error) {
log := ctrllog.FromContext(ctx)
node, err := nodes.Get(ctx, c.serviceClient, nodeID).Extract()
if err != nil {
Comment thread
DanNiESh marked this conversation as resolved.
if isAuthError(err) {
log.Info("auth error on GetNode, attempting reconnect", "nodeID", nodeID, "error", err)
if reconnErr := c.reconnect(ctx); reconnErr != nil {
return nil, fmt.Errorf("get node %s: reconnect failed: %w", nodeID, reconnErr)
}
node, err = nodes.Get(ctx, c.serviceClient, nodeID).Extract()
if err != nil {
return nil, fmt.Errorf("get node %s after reconnect: %w", nodeID, err)
}
return node, nil
}
return nil, fmt.Errorf("get node %s: %w", nodeID, err)
}
return node, nil
Expand All @@ -91,8 +148,20 @@ func (c *Client) IsNodePowerTransitioning(node *nodes.Node) bool {

// SetPowerState requests power on or off for the node via Ironic.
func (c *Client) SetPowerState(ctx context.Context, nodeID string, target TargetPowerState) error {
log := ctrllog.FromContext(ctx)
res := nodes.ChangePowerState(ctx, c.serviceClient, nodeID, nodes.PowerStateOpts{Target: target.powerstate})
if err := res.ExtractErr(); err != nil {
if isAuthError(err) {
log.Info("auth error on SetPowerState, attempting reconnect", "nodeID", nodeID, "target", target.String(), "error", err)
if reconnErr := c.reconnect(ctx); reconnErr != nil {
return fmt.Errorf("failed to set power state on node %s: reconnect failed: %w", nodeID, reconnErr)
}
res = nodes.ChangePowerState(ctx, c.serviceClient, nodeID, nodes.PowerStateOpts{Target: target.powerstate})
if err := res.ExtractErr(); err != nil {
return fmt.Errorf("failed to set power state on node %s after reconnect: %w", nodeID, err)
}
return nil
}
return fmt.Errorf("failed to set power state on node %s: %w", nodeID, err)
}
return nil
Expand Down
182 changes: 182 additions & 0 deletions internal/ironic/client_internal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/*
Copyright 2026.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ironic

import (
"context"
"fmt"
"net/http"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

"github.com/gophercloud/gophercloud/v2"
)

const (
oldEndpoint = "http://old:6385/v1/"
newEndpoint = "http://new:6385/v1/"
)

var _ = Describe("isAuthError", func() {
It("should return false for nil error", func() {
Expect(isAuthError(nil)).To(BeFalse())
})

It("should return false for generic errors", func() {
Expect(isAuthError(fmt.Errorf("some random error"))).To(BeFalse())
})

It("should return true for 401 ErrUnexpectedResponseCode", func() {
err := gophercloud.ErrUnexpectedResponseCode{
Actual: http.StatusUnauthorized,
Expected: []int{http.StatusOK},
}
Expect(isAuthError(err)).To(BeTrue())
})

It("should return false for 404 ErrUnexpectedResponseCode", func() {
err := gophercloud.ErrUnexpectedResponseCode{
Actual: http.StatusNotFound,
Expected: []int{http.StatusOK},
}
Expect(isAuthError(err)).To(BeFalse())
})

It("should return false for 500 ErrUnexpectedResponseCode", func() {
err := gophercloud.ErrUnexpectedResponseCode{
Actual: http.StatusInternalServerError,
Expected: []int{http.StatusOK},
}
Expect(isAuthError(err)).To(BeFalse())
})

It("should return true for *ErrUnableToReauthenticate (pointer, as gophercloud returns)", func() {
err := &gophercloud.ErrUnableToReauthenticate{
ErrOriginal: fmt.Errorf("original"),
ErrReauth: fmt.Errorf("reauth failed"),
}
Expect(isAuthError(err)).To(BeTrue())
})

It("should return true for *ErrErrorAfterReauthentication (pointer, as gophercloud returns)", func() {
err := &gophercloud.ErrErrorAfterReauthentication{
ErrOriginal: fmt.Errorf("still failing"),
}
Expect(isAuthError(err)).To(BeTrue())
})

It("should return true for wrapped *ErrUnableToReauthenticate", func() {
inner := &gophercloud.ErrUnableToReauthenticate{
ErrOriginal: fmt.Errorf("original"),
ErrReauth: fmt.Errorf("reauth failed"),
}
wrapped := fmt.Errorf("operation failed: %w", inner)
Expect(isAuthError(wrapped)).To(BeTrue())
})

It("should return true for wrapped *ErrErrorAfterReauthentication", func() {
inner := &gophercloud.ErrErrorAfterReauthentication{
ErrOriginal: fmt.Errorf("still failing"),
}
wrapped := fmt.Errorf("operation failed: %w", inner)
Expect(isAuthError(wrapped)).To(BeTrue())
})

It("should return true for wrapped 401 error", func() {
inner := gophercloud.ErrUnexpectedResponseCode{
Actual: http.StatusUnauthorized,
Expected: []int{http.StatusOK},
}
wrapped := fmt.Errorf("operation failed: %w", inner)
Expect(isAuthError(wrapped)).To(BeTrue())
})

It("should return false for wrapped non-auth error", func() {
inner := gophercloud.ErrUnexpectedResponseCode{
Actual: http.StatusNotFound,
Expected: []int{http.StatusOK},
}
wrapped := fmt.Errorf("operation failed: %w", inner)
Expect(isAuthError(wrapped)).To(BeFalse())
})
})

var _ = Describe("reconnect", func() {
It("should swap the service client on success", func() {
oldSC := &gophercloud.ServiceClient{Endpoint: oldEndpoint}
newSC := &gophercloud.ServiceClient{Endpoint: newEndpoint}

c := &Client{
serviceClient: oldSC,
newServiceClient: func(context.Context) (*gophercloud.ServiceClient, error) {
return newSC, nil
},
}

Expect(c.reconnect(context.Background())).To(Succeed())
Expect(c.serviceClient).To(Equal(newSC))
Expect(c.GetEndpoint()).To(Equal(newEndpoint))
})

It("should return error when factory fails", func() {
oldSC := &gophercloud.ServiceClient{Endpoint: oldEndpoint}

c := &Client{
serviceClient: oldSC,
newServiceClient: func(context.Context) (*gophercloud.ServiceClient, error) {
return nil, fmt.Errorf("keystone is down")
},
}

err := c.reconnect(context.Background())
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("keystone is down"))
Expect(c.serviceClient).To(Equal(oldSC), "should keep old client on failure")
})

It("should return error when factory returns nil client without error", func() {
oldSC := &gophercloud.ServiceClient{Endpoint: oldEndpoint}

c := &Client{
serviceClient: oldSC,
newServiceClient: func(context.Context) (*gophercloud.ServiceClient, error) {
return nil, nil
},
}

err := c.reconnect(context.Background())
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("nil service client"))
Expect(c.serviceClient).To(Equal(oldSC), "should keep old client on nil return")
})

It("should return error when new client has empty endpoint", func() {
oldSC := &gophercloud.ServiceClient{Endpoint: oldEndpoint}

c := &Client{
serviceClient: oldSC,
newServiceClient: func(context.Context) (*gophercloud.ServiceClient, error) {
return &gophercloud.ServiceClient{Endpoint: ""}, nil
},
}

err := c.reconnect(context.Background())
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("no endpoint configured"))
})
})
Comment thread
coderabbitai[bot] marked this conversation as resolved.
9 changes: 5 additions & 4 deletions internal/ironic/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package ironic_test

import (
"context"
"os"
"testing"

Expand Down Expand Up @@ -106,7 +107,7 @@ var _ = Describe("Client", func() {
})

It("should return an error when no credentials are available", func() {
client, err := ironic.NewClient()
client, err := ironic.NewClient(context.Background())
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("failed to create baremetal client"))
Expect(client).To(BeNil())
Expand All @@ -121,14 +122,14 @@ var _ = Describe("Client", func() {
})

It("should create a client successfully", func() {
client, err := ironic.NewClient()
client, err := ironic.NewClient(context.Background())
Expect(err).NotTo(HaveOccurred())
Expect(client).NotTo(BeNil())
Expect(client.GetEndpoint()).NotTo(BeEmpty())
})

It("should have a non-empty endpoint", func() {
client, err := ironic.NewClient()
client, err := ironic.NewClient(context.Background())
Expect(err).NotTo(HaveOccurred())

endpoint := client.GetEndpoint()
Expand All @@ -145,7 +146,7 @@ var _ = Describe("Client", func() {
})

It("should return a valid HTTP(S) endpoint", func() {
client, err := ironic.NewClient()
client, err := ironic.NewClient(context.Background())
Expect(err).NotTo(HaveOccurred())

endpoint := client.GetEndpoint()
Expand Down
Loading