diff --git a/pkg/csi/cinder/controllerserver.go b/pkg/csi/cinder/controllerserver.go index 1a1b9e2ff6..de9a91549c 100644 --- a/pkg/csi/cinder/controllerserver.go +++ b/pkg/csi/cinder/controllerserver.go @@ -23,6 +23,7 @@ import ( "slices" "sort" "strconv" + "time" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/gophercloud/gophercloud/v2/openstack/blockstorage/v3/backups" @@ -51,6 +52,10 @@ const ( cinderCSIClusterIDKey = "cinder.csi.openstack.org/cluster" affinityKey = "cinder.csi.openstack.org/affinity" antiAffinityKey = "cinder.csi.openstack.org/anti-affinity" + + // volumeReadyPollInterval is the interval at which the volume status is + // polled when waiting for a volume to become available before attaching. + volumeReadyPollInterval = 3 * time.Second ) func (cs *controllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) { @@ -321,6 +326,18 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs return nil, status.Errorf(codes.Internal, "[ControllerPublishVolume] get volume failed with error %v", err) } + // Wait for the volume to be available before attempting attach. + // Cinder rejects attachment requests for volumes not in "available" (or + // "in-use" for multiattach) state. Without this wait, the driver would + // immediately hit a 409 Conflict if CreateVolume has just been called and + // the volume is still being provisioned on the backend. + targetStatus := []string{openstack.VolumeAvailableStatus, openstack.VolumeInUseStatus} + err = cloud.WaitVolumeTargetStatusWithContext(ctx, volumeID, targetStatus, volumeReadyPollInterval) + if err != nil { + klog.Errorf("Failed waiting for volume %s to become available: %v", volumeID, err) + return nil, status.Errorf(codes.FailedPrecondition, "[ControllerPublishVolume] Volume %s is not available for attach: %v", volumeID, err) + } + _, err = cloud.GetInstanceByID(ctx, instanceID) if err != nil { if cpoerrors.IsNotFound(err) { diff --git a/pkg/csi/cinder/controllerserver_test.go b/pkg/csi/cinder/controllerserver_test.go index df608d1b2c..aa8c673f87 100644 --- a/pkg/csi/cinder/controllerserver_test.go +++ b/pkg/csi/cinder/controllerserver_test.go @@ -529,6 +529,7 @@ func TestDeleteVolume(t *testing.T) { func TestControllerPublishVolume(t *testing.T) { fakeCs, osmock := fakeControllerServer() + osmock.On("WaitVolumeTargetStatusWithContext", FakeVolID, mock.AnythingOfType("[]string"), mock.AnythingOfType("time.Duration")).Return(nil) osmock.On("AttachVolume", FakeNodeID, FakeVolID).Return(FakeVolID, nil) osmock.On("WaitDiskAttached", FakeNodeID, FakeVolID).Return(nil) osmock.On("GetAttachmentDiskPath", FakeNodeID, FakeVolID).Return(FakeDevicePath, nil) diff --git a/pkg/csi/cinder/openstack/openstack.go b/pkg/csi/cinder/openstack/openstack.go index 7402a61fbb..6f0d87f006 100644 --- a/pkg/csi/cinder/openstack/openstack.go +++ b/pkg/csi/cinder/openstack/openstack.go @@ -21,6 +21,7 @@ import ( "fmt" "net/http" "os" + "time" "github.com/gophercloud/gophercloud/v2" "github.com/gophercloud/gophercloud/v2/openstack" @@ -54,6 +55,7 @@ type IOpenStack interface { DetachVolume(ctx context.Context, instanceID, volumeID string) error WaitDiskDetached(ctx context.Context, instanceID string, volumeID string) error WaitVolumeTargetStatus(ctx context.Context, volumeID string, tStatus []string) error + WaitVolumeTargetStatusWithContext(ctx context.Context, volumeID string, tStatus []string, interval time.Duration) error GetAttachmentDiskPath(ctx context.Context, instanceID, volumeID string) (string, error) GetVolume(ctx context.Context, volumeID string) (*volumes.Volume, error) GetVolumesByName(ctx context.Context, name string) ([]volumes.Volume, error) diff --git a/pkg/csi/cinder/openstack/openstack_mock.go b/pkg/csi/cinder/openstack/openstack_mock.go index 8849b1bf53..d88fb960ef 100644 --- a/pkg/csi/cinder/openstack/openstack_mock.go +++ b/pkg/csi/cinder/openstack/openstack_mock.go @@ -19,6 +19,7 @@ package openstack import ( "context" "fmt" + "time" "github.com/gophercloud/gophercloud/v2/openstack/blockstorage/v3/backups" "github.com/gophercloud/gophercloud/v2/openstack/blockstorage/v3/snapshots" @@ -200,6 +201,20 @@ func (_m *OpenStackMock) WaitVolumeTargetStatus(ctx context.Context, volumeID st return r0 } +// WaitVolumeTargetStatusWithContext provides a mock function with given fields: volumeID, tStatus, interval +func (_m *OpenStackMock) WaitVolumeTargetStatusWithContext(ctx context.Context, volumeID string, tStatus []string, interval time.Duration) error { + ret := _m.Called(volumeID, tStatus, interval) + + var r0 error + if rf, ok := ret.Get(0).(func(string, []string, time.Duration) error); ok { + r0 = rf(volumeID, tStatus, interval) + } else { + r0 = ret.Error(0) + } + + return r0 +} + // WaitDiskDetached provides a mock function with given fields: instanceID, volumeID func (_m *OpenStackMock) WaitDiskDetached(ctx context.Context, instanceID string, volumeID string) error { ret := _m.Called(instanceID, volumeID) diff --git a/pkg/csi/cinder/openstack/openstack_volumes.go b/pkg/csi/cinder/openstack/openstack_volumes.go index 41499b77cf..f4f67c8b9b 100644 --- a/pkg/csi/cinder/openstack/openstack_volumes.go +++ b/pkg/csi/cinder/openstack/openstack_volumes.go @@ -298,6 +298,37 @@ func (os *OpenStack) WaitVolumeTargetStatus(ctx context.Context, volumeID string return waitErr } +// WaitVolumeTargetStatusWithContext waits for volume to be in target state, +// respecting the context deadline. This is useful when the caller has a +// context with a deadline (e.g., gRPC request context) and wants the wait +// to be bounded by that deadline rather than a fixed number of steps. +// The interval parameter controls how often the volume status is polled. +func (os *OpenStack) WaitVolumeTargetStatusWithContext(ctx context.Context, volumeID string, tStatus []string, interval time.Duration) error { + err := wait.PollUntilContextCancel(ctx, interval, true, func(ctx context.Context) (bool, error) { + vol, err := os.GetVolume(ctx, volumeID) + if err != nil { + return false, err + } + for _, t := range tStatus { + if vol.Status == t { + return true, nil + } + } + for _, eState := range volumeErrorStates { + if vol.Status == eState { + return false, fmt.Errorf("volume %s is in error state: %s", volumeID, vol.Status) + } + } + return false, nil + }) + + if err != nil && ctx.Err() != nil { + return fmt.Errorf("timeout waiting for volume %s to reach status %v: %w", volumeID, tStatus, err) + } + + return err +} + // DetachVolume detaches given cinder volume from the compute func (os *OpenStack) DetachVolume(ctx context.Context, instanceID, volumeID string) error { volume, err := os.GetVolume(ctx, volumeID)