Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions plugins/module_utils/nbdkit/nbdkit.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ type NbdkitConfig struct {
VmName string
Compression string
UUID string
Port string
Timeout int
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, As I said in the module migrate.go, I dont think we should expose to the user the Timeout for nbdkit and the retry as well. It could be configurable at the package level, but I think it can be risky to expose those settings for several reasons (we know those retry limits are sane even in a stressed environment).

RetryDelay int
UseSocks bool
VddkConfig *vmware.VddkConfig
}
Expand Down Expand Up @@ -73,7 +76,7 @@ func (c *NbdkitConfig) RunNbdKitFromLocal(diskName, diskPath string) (*NbdkitSer
logger.Log.Infof("nbdkit started...")
logger.Log.Infof("Command: %v", cmd)
time.Sleep(100 * time.Millisecond)
err := WaitForNbdkit(socket, 30*time.Second)
err := WaitForNbdkit(socket, c.Timeout, c.RetryDelay)
if err != nil {
logger.Log.Infof("Failed to wait for nbdkit: %v", err)
if cmd.Process != nil {
Expand Down Expand Up @@ -134,7 +137,7 @@ func (c *NbdkitConfig) RunNbdKitURI(diskName string) (*NbdkitServer, error) {
logger.Log.Infof("Command: %v", cmd)

time.Sleep(100 * time.Millisecond)
err = WaitForNbdkitURI("localhost", "10809", 30*time.Second)
err = WaitForNbdkitURI("localhost", c.Port, c.Timeout, c.RetryDelay)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you make the port as a variable, you have to set for nbdkit as well otherwise it's just waiting for the wrong port.

if err != nil {
logger.Log.Infof("Failed to wait for nbdkit: %v", err)
if cmd.Process != nil {
Expand Down Expand Up @@ -186,7 +189,7 @@ func (c *NbdkitConfig) RunNbdKitSocks(diskName string) (*NbdkitServer, error) {
logger.Log.Infof("Command: %v", cmd)

time.Sleep(100 * time.Millisecond)
err = WaitForNbdkit(socket, 30*time.Second)
err = WaitForNbdkit(socket, c.Timeout, c.RetryDelay)
if err != nil {
logger.Log.Infof("Failed to wait for nbdkit: %v", err)
if cmd.Process != nil {
Expand Down Expand Up @@ -238,9 +241,9 @@ func removeSocket(socketPath string) error {
return nil
}

func WaitForNbdkitURI(host string, port string, timeout time.Duration) error {
func WaitForNbdkitURI(host string, port string, timeoutSeconds int, retryDelaySeconds int) error {
address := net.JoinHostPort(host, port)
deadline := time.Now().Add(timeout)
deadline := time.Now().Add(time.Duration(timeoutSeconds) * time.Second)

for time.Now().Before(deadline) {
conn, err := net.DialTimeout("tcp", address, 2*time.Second)
Expand All @@ -252,26 +255,26 @@ func WaitForNbdkitURI(host string, port string, timeout time.Duration) error {
return nil
}
logger.Log.Infof("Waiting for nbdkit to be ready...")
time.Sleep(2 * time.Second)
time.Sleep(time.Duration(retryDelaySeconds) * time.Second)
}
return fmt.Errorf("timed out waiting for nbdkit to be ready")
}

func WaitForNbdkit(socket string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
func WaitForNbdkit(socket string, timeoutSeconds int, retryDelaySeconds int) error {
deadline := time.Now().Add(time.Duration(timeoutSeconds) * time.Second)

for time.Now().Before(deadline) {
if _, err := os.Stat(socket); err == nil {
logger.Log.Infof("nbdkit is ready.")
return nil
}
logger.Log.Infof("Waiting for nbdkit to be ready...")
time.Sleep(2 * time.Second)
time.Sleep(time.Duration(retryDelaySeconds) * time.Second)
}
return fmt.Errorf("timed out waiting for nbdkit to be ready")
}

func NbdCopy(socket, device string, assumeZero bool) error {
func NbdCopy(socket, device, port string, assumeZero bool) error {
var nbdcopy string
var zeroArg string
if assumeZero {
Expand All @@ -280,7 +283,7 @@ func NbdCopy(socket, device string, assumeZero bool) error {
zeroArg = " "
}
if socket == "" {
nbdcopy = fmt.Sprintf("/usr/bin/nbdcopy nbd://localhost %s%s--progress", device, zeroArg)
nbdcopy = fmt.Sprintf("/usr/bin/nbdcopy nbd://localhost:%s %s%s--progress", port, device, zeroArg)
} else {
nbdcopy = fmt.Sprintf("/usr/bin/nbdcopy %s %s%s--progress", socket, device, zeroArg)
}
Expand Down
9 changes: 5 additions & 4 deletions plugins/module_utils/openstack/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ func CreatePort(provider *gophercloud.ProviderClient, portName, networkID, macAd
}

// WaitForPortStatus waits for a port to reach a specific status
func WaitForPortStatus(client *gophercloud.ServiceClient, portID, status string, timeout int) error {
for i := 0; i < timeout; i++ {
func WaitForPortStatus(client *gophercloud.ServiceClient, portID, status string, timeoutSeconds int) error {
timeoutIterations := timeoutSeconds / 5
for i := 0; i < timeoutIterations; i++ {
port, err := ports.Get(context.TODO(), client, portID).Extract()
if err != nil {
// If port is not found, it might be deleted (which is what we want)
Expand All @@ -121,7 +122,7 @@ func WaitForPortStatus(client *gophercloud.ServiceClient, portID, status string,
}

// DeletePort deletes a network port by ID
func DeletePort(provider *gophercloud.ProviderClient, portID string) error {
func DeletePort(provider *gophercloud.ProviderClient, portID string, timeoutSeconds int) error {
client, err := openstack.NewNetworkV2(provider, gophercloud.EndpointOpts{
Region: os.Getenv("OS_REGION_NAME"),
})
Expand All @@ -139,7 +140,7 @@ func DeletePort(provider *gophercloud.ProviderClient, portID string) error {

// Wait for port to be fully deleted to avoid MAC address conflicts
logger.Log.Infof("Waiting for port %s to be fully deleted...", portID)
err = WaitForPortStatus(client, portID, "deleted", 12) // 60 seconds timeout
err = WaitForPortStatus(client, portID, "deleted", timeoutSeconds)
if err != nil {
logger.Log.Infof("Port %s did not reach deleted status within timeout: %v", portID, err)
// Don't return error here as the port deletion might have succeeded
Expand Down
34 changes: 18 additions & 16 deletions plugins/module_utils/openstack/openstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func OpenstackAuth(ctx context.Context, moduleOpts DstCloud) (*gophercloud.Provi
return provider, nil
}

func CreateVolume(provider *gophercloud.ProviderClient, opts VolOpts, setUEFI bool) (*volumes.Volume, error) {
func CreateVolume(provider *gophercloud.ProviderClient, opts VolOpts, setUEFI bool, timeoutSeconds int) (*volumes.Volume, error) {
client, err := openstack.NewBlockStorageV3(provider, gophercloud.EndpointOpts{
Region: os.Getenv("OS_REGION_NAME"),
})
Expand All @@ -133,7 +133,7 @@ func CreateVolume(provider *gophercloud.ProviderClient, opts VolOpts, setUEFI bo
return nil, err
}

err = WaitForVolumeStatus(client, volume.ID, "available", 3000)
err = WaitForVolumeStatus(client, volume.ID, "available", timeoutSeconds)
if err != nil {
logger.Log.Infof("Failed to wait for volume to become available: %v", err)
return nil, err
Expand Down Expand Up @@ -165,8 +165,9 @@ func CreateVolume(provider *gophercloud.ProviderClient, opts VolOpts, setUEFI bo
return volume, nil
}

func WaitForVolumeStatus(client *gophercloud.ServiceClient, volumeID, status string, timeout int) error {
for i := 0; i < timeout; i++ {
func WaitForVolumeStatus(client *gophercloud.ServiceClient, volumeID, status string, timeoutSeconds int) error {
timeoutIterations := timeoutSeconds / 5
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why just not use the timeout as it was ?
It seems to me confusing, if user set a timeout (600 for example) and then this value is divided by 5, so the timeout won't be what they set.

for i := 0; i < timeoutIterations; i++ {
volume, err := volumes.Get(context.TODO(), client, volumeID).Extract()
if err != nil {
logger.Log.Infof("Failed to get volume status: %v", err)
Expand All @@ -181,8 +182,9 @@ func WaitForVolumeStatus(client *gophercloud.ServiceClient, volumeID, status str
return fmt.Errorf("volume %s did not reach status %s within the timeout", volumeID, status)
}

func WaitForServerStatus(client *gophercloud.ServiceClient, serverID, status string, timeout int) error {
for i := 0; i < timeout; i++ {
func WaitForServerStatus(client *gophercloud.ServiceClient, serverID, status string, timeoutSeconds int) error {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here too ?

timeoutIterations := timeoutSeconds / 5
for i := 0; i < timeoutIterations; i++ {
server, err := servers.Get(context.TODO(), client, serverID).Extract()
if err != nil {
logger.Log.Infof("Failed to get server status: %v", err)
Expand Down Expand Up @@ -331,7 +333,7 @@ func GetInstanceUUID() (string, error) {
return metaData.UUID, nil
}

func AttachVolume(client *gophercloud.ProviderClient, volumeID string, instanceName string, instanceUUID string) error {
func AttachVolume(client *gophercloud.ProviderClient, volumeID string, instanceName string, instanceUUID string, timeoutSeconds int) error {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think just naming "timeout" is fine.

computeClient, err := openstack.NewComputeV2(client, gophercloud.EndpointOpts{})
logger.Log.Infof("Volume ID: %s", volumeID)
if err != nil {
Expand Down Expand Up @@ -373,15 +375,15 @@ func AttachVolume(client *gophercloud.ProviderClient, volumeID string, instanceN
logger.Log.Infof("Failed to create block storage client: %v", err)
return err
}
err = WaitForVolumeStatus(volumeClient, volumeID, "in-use", 3000)
err = WaitForVolumeStatus(volumeClient, volumeID, "in-use", timeoutSeconds)
if err != nil {
logger.Log.Infof("Failed to wait for volume to become in-use: %v", err)
return err
}
return nil
}

func DetachVolume(client *gophercloud.ProviderClient, volumeID, instanceName, instanceUUID string, cloudOpts DstCloud) error {
func DetachVolume(client *gophercloud.ProviderClient, volumeID, instanceName, instanceUUID string, cloudOpts DstCloud, timeoutSeconds int) error {
computeClient, err := openstack.NewComputeV2(client, gophercloud.EndpointOpts{})
if err != nil {
logger.Log.Infof("Failed to create compute client: %v", err)
Expand Down Expand Up @@ -434,7 +436,7 @@ func DetachVolume(client *gophercloud.ProviderClient, volumeID, instanceName, in
logger.Log.Infof("Failed to create block storage client: %v", err)
return err
}
err = WaitForVolumeStatus(volumeClient, volumeID, "available", 3000)
err = WaitForVolumeStatus(volumeClient, volumeID, "available", timeoutSeconds)
if err != nil {
logger.Log.Infof("Failed to wait for volume to become available: %v", err)
return err
Expand All @@ -443,7 +445,7 @@ func DetachVolume(client *gophercloud.ProviderClient, volumeID, instanceName, in
return nil
}

func CreateServer(provider *gophercloud.ProviderClient, args ServerArgs) (string, error) {
func CreateServer(provider *gophercloud.ProviderClient, args ServerArgs, timeoutSeconds int) (string, error) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here too: s/timeoutSeconds/timeout/

client, err := openstack.NewComputeV2(provider, gophercloud.EndpointOpts{
Region: os.Getenv("OS_REGION_NAME"),
})
Expand Down Expand Up @@ -499,7 +501,7 @@ func CreateServer(provider *gophercloud.ProviderClient, args ServerArgs) (string
if err != nil {
return "", fmt.Errorf("failed to create server: %v", err)
}
err = WaitForServerStatus(client, server.ID, "ACTIVE", 3000)
err = WaitForServerStatus(client, server.ID, "ACTIVE", timeoutSeconds)
if err != nil {
return "", err
}
Expand Down Expand Up @@ -566,7 +568,7 @@ func DeleteServer(provider *gophercloud.ProviderClient, serverID string) error {
}

// DeleteVolume deletes a volume by ID
func DeleteVolume(provider *gophercloud.ProviderClient, volumeID string) error {
func DeleteVolume(provider *gophercloud.ProviderClient, volumeID string, timeoutSeconds int) error {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here too: s/timeoutSeconds/timeout/

client, err := openstack.NewBlockStorageV3(provider, gophercloud.EndpointOpts{
Region: os.Getenv("OS_REGION_NAME"),
})
Expand Down Expand Up @@ -594,12 +596,12 @@ func DeleteVolume(provider *gophercloud.ProviderClient, volumeID string) error {
} else {
logger.Log.Infof("Volume %s is in status '%s', waiting for it to become available or error...", volumeID, volume.Status)

// Wait for volume to become available (timeout: 60 seconds = 12 attempts * 5 seconds)
err = WaitForVolumeStatus(client, volumeID, "available", 12)
// Wait for volume to become available
err = WaitForVolumeStatus(client, volumeID, "available", timeoutSeconds)
if err != nil {
// If it doesn't become available, try to wait for error status
logger.Log.Infof("Volume did not become available, trying to wait for error status...")
err = WaitForVolumeStatus(client, volumeID, "error", 12)
err = WaitForVolumeStatus(client, volumeID, "error", timeoutSeconds)
if err != nil {
logger.Log.Infof("Volume did not reach a deletable state within timeout: %v", err)
return fmt.Errorf("volume %s did not reach a deletable state within timeout", volumeID)
Expand Down
67 changes: 45 additions & 22 deletions plugins/modules/src/migrate/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
moduleutils "vmware-migration-kit/plugins/module_utils"
Expand Down Expand Up @@ -87,20 +88,25 @@ type MigrationConfig struct {
ManageExtVol bool
BootScript string
ExtraOpts string
TimeoutSeconds int
CinderManageConfig *osm_os.CinderManageConfig
}

// Ansible
type ModuleArgs struct {
DstCloud osm_os.DstCloud `json:"dst_cloud"`
User string
Password string
Server string
Libdir string
VmName string
VolumeAz string
VolumeType string
AssumeZero bool
DstCloud osm_os.DstCloud `json:"dst_cloud"`
User string
Password string
Server string
Libdir string
Port string
Copy link
Copy Markdown
Contributor

@matbu matbu Dec 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont think all these values should be configurable via the modules.

The port can be useful in some specific cases, but we specified in the network configuration (doc) which ports needs to be open & so on and since we have the total control of the conversion host because it's a host created by us and dedicated to the migration, the nbdkit port can remain what we decided. But if we want to make it as a variable then it should be renamed as NbdkitPort (because here Port can be confused with the Server port)

TimeoutSeconds int
NbdkitTimeout int
NbdkitRetryDelay int
VmName string
VolumeAz string
VolumeType string
AssumeZero bool
VddkPath string
OSMDataDir string
CBTSync bool
Expand Down Expand Up @@ -245,7 +251,7 @@ func (c *MigrationConfig) VMMigration(parentCtx context.Context, runV2V bool) (s
} else {
logger.Log.Infof("Creating new volume..")
// Create volume
volume, err = osm_os.CreateVolume(c.OSClient, volOpts, uefi)
volume, err = osm_os.CreateVolume(c.OSClient, volOpts, uefi, c.TimeoutSeconds)
if err != nil {
logger.Log.Infof("Failed to create volume: %v", err)
return "", err
Expand All @@ -266,14 +272,14 @@ func (c *MigrationConfig) VMMigration(parentCtx context.Context, runV2V bool) (s
return "", err
}
}
err = osm_os.AttachVolume(c.OSClient, volume.ID, c.ConvHostName, instanceUUID)
err = osm_os.AttachVolume(c.OSClient, volume.ID, c.ConvHostName, instanceUUID, c.TimeoutSeconds)
if err != nil {
logger.Log.Infof("Failed to attach volume: %v", err)
return "", err
}
// TODO: remove instanceName or handle it properly
defer func() {
if err := osm_os.DetachVolume(c.OSClient, volume.ID, "", instanceUUID, c.CloudOpts); err != nil {
if err := osm_os.DetachVolume(c.OSClient, volume.ID, "", instanceUUID, c.CloudOpts, c.TimeoutSeconds); err != nil {
logger.Log.Infof("Failed to detach volume: %v", err)
}
}()
Expand Down Expand Up @@ -338,7 +344,7 @@ func (c *MigrationConfig) VMMigration(parentCtx context.Context, runV2V bool) (s
logger.Log.Infof("No change in VM, skipping volume sync")
}
} else {
err = nbdkit.NbdCopy(sock, devPath, c.AssumeZero)
err = nbdkit.NbdCopy(sock, devPath, c.NbdkitConfig.Port, c.AssumeZero)
if err != nil {
logger.Log.Infof("Failed to copy disk: %v", err)
if err := nbdSrv.Stop(); err != nil {
Expand Down Expand Up @@ -412,6 +418,19 @@ func main() {
server := ansible.RequireField(moduleArgs.Server, "Server is required")
vmname := ansible.RequireField(moduleArgs.VmName, "VM name is required")
libdir := ansible.DefaultIfEmpty(moduleArgs.Libdir, "/usr/lib/vmware-vix-disklib")
port := ansible.DefaultIfEmpty(moduleArgs.Port, "10809")
timeoutSeconds := moduleArgs.TimeoutSeconds
if timeoutSeconds == 0 {
timeoutSeconds = 15000
}
nbdkitTimeout := moduleArgs.NbdkitTimeout
if nbdkitTimeout == 0 {
nbdkitTimeout = 30
}
nbdkitRetryDelay := moduleArgs.NbdkitRetryDelay
if nbdkitRetryDelay == 0 {
nbdkitRetryDelay = 2
}
vddkpath := ansible.DefaultIfEmpty(moduleArgs.VddkPath, "/ha-datacenter/vm/")
osmdatadir := ansible.DefaultIfEmpty(moduleArgs.OSMDataDir, "/tmp/")
convHostName := ansible.DefaultIfEmpty(moduleArgs.ConvHostName, "")
Expand Down Expand Up @@ -441,7 +460,7 @@ func main() {
ansible.FailJson(response)
}
safeVmName := moduleutils.SafeVmName(vmname)
LogFile := "/tmp/osm-nbdkit-" + safeVmName + "-" + r + ".log"
LogFile := filepath.Join(osmdatadir, "osm-nbdkit-"+safeVmName+"-"+r+".log")
logger.InitLogger(LogFile)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
Expand Down Expand Up @@ -494,6 +513,9 @@ func main() {
VmName: vmname,
Compression: compression,
UUID: r,
Port: port,
Timeout: nbdkitTimeout,
RetryDelay: nbdkitRetryDelay,
UseSocks: socks,
VddkConfig: &vmware.VddkConfig{
VirtualMachine: vm,
Expand All @@ -514,14 +536,15 @@ func main() {
Compression: compression,
RunScript: runScript,
BootScript: bootScript,
ExtraOpts: extraOpts,
InstanceUUID: instanceUUid,
Debug: debug,
LocalDiskPath: localDisk,
CloudOpts: moduleArgs.DstCloud,
VolumeType: volType,
VolumeAz: volAz,
AssumeZero: assumeZero,
ExtraOpts: extraOpts,
InstanceUUID: instanceUUid,
Debug: debug,
LocalDiskPath: localDisk,
CloudOpts: moduleArgs.DstCloud,
VolumeType: volType,
VolumeAz: volAz,
AssumeZero: assumeZero,
TimeoutSeconds: timeoutSeconds,
}
volUUID, err := VMMigration.VMMigration(ctx, runV2V)
if err != nil {
Expand Down
Loading