-
Notifications
You must be signed in to change notification settings - Fork 20
Make hardcoded timeout and configuration values configurable #180
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,6 +41,9 @@ type NbdkitConfig struct { | |
| VmName string | ||
| Compression string | ||
| UUID string | ||
| Port string | ||
| Timeout int | ||
| RetryDelay int | ||
| UseSocks bool | ||
| VddkConfig *vmware.VddkConfig | ||
| } | ||
|
|
@@ -73,7 +76,7 @@ func (c *NbdkitConfig) RunNbdKitFromLocal(diskName, diskPath string) (*NbdkitSer | |
| logger.Log.Infof("nbdkit started...") | ||
| logger.Log.Infof("Command: %v", cmd) | ||
| time.Sleep(100 * time.Millisecond) | ||
| err := WaitForNbdkit(socket, 30*time.Second) | ||
| err := WaitForNbdkit(socket, c.Timeout, c.RetryDelay) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to wait for nbdkit: %v", err) | ||
| if cmd.Process != nil { | ||
|
|
@@ -134,7 +137,7 @@ func (c *NbdkitConfig) RunNbdKitURI(diskName string) (*NbdkitServer, error) { | |
| logger.Log.Infof("Command: %v", cmd) | ||
|
|
||
| time.Sleep(100 * time.Millisecond) | ||
| err = WaitForNbdkitURI("localhost", "10809", 30*time.Second) | ||
| err = WaitForNbdkitURI("localhost", c.Port, c.Timeout, c.RetryDelay) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you make the port as a variable, you have to set for nbdkit as well otherwise it's just waiting for the wrong port. |
||
| if err != nil { | ||
| logger.Log.Infof("Failed to wait for nbdkit: %v", err) | ||
| if cmd.Process != nil { | ||
|
|
@@ -186,7 +189,7 @@ func (c *NbdkitConfig) RunNbdKitSocks(diskName string) (*NbdkitServer, error) { | |
| logger.Log.Infof("Command: %v", cmd) | ||
|
|
||
| time.Sleep(100 * time.Millisecond) | ||
| err = WaitForNbdkit(socket, 30*time.Second) | ||
| err = WaitForNbdkit(socket, c.Timeout, c.RetryDelay) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to wait for nbdkit: %v", err) | ||
| if cmd.Process != nil { | ||
|
|
@@ -238,9 +241,9 @@ func removeSocket(socketPath string) error { | |
| return nil | ||
| } | ||
|
|
||
| func WaitForNbdkitURI(host string, port string, timeout time.Duration) error { | ||
| func WaitForNbdkitURI(host string, port string, timeoutSeconds int, retryDelaySeconds int) error { | ||
| address := net.JoinHostPort(host, port) | ||
| deadline := time.Now().Add(timeout) | ||
| deadline := time.Now().Add(time.Duration(timeoutSeconds) * time.Second) | ||
|
|
||
| for time.Now().Before(deadline) { | ||
| conn, err := net.DialTimeout("tcp", address, 2*time.Second) | ||
|
|
@@ -252,26 +255,26 @@ func WaitForNbdkitURI(host string, port string, timeout time.Duration) error { | |
| return nil | ||
| } | ||
| logger.Log.Infof("Waiting for nbdkit to be ready...") | ||
| time.Sleep(2 * time.Second) | ||
| time.Sleep(time.Duration(retryDelaySeconds) * time.Second) | ||
| } | ||
| return fmt.Errorf("timed out waiting for nbdkit to be ready") | ||
| } | ||
|
|
||
| func WaitForNbdkit(socket string, timeout time.Duration) error { | ||
| deadline := time.Now().Add(timeout) | ||
| func WaitForNbdkit(socket string, timeoutSeconds int, retryDelaySeconds int) error { | ||
| deadline := time.Now().Add(time.Duration(timeoutSeconds) * time.Second) | ||
|
|
||
| for time.Now().Before(deadline) { | ||
| if _, err := os.Stat(socket); err == nil { | ||
| logger.Log.Infof("nbdkit is ready.") | ||
| return nil | ||
| } | ||
| logger.Log.Infof("Waiting for nbdkit to be ready...") | ||
| time.Sleep(2 * time.Second) | ||
| time.Sleep(time.Duration(retryDelaySeconds) * time.Second) | ||
| } | ||
| return fmt.Errorf("timed out waiting for nbdkit to be ready") | ||
| } | ||
|
|
||
| func NbdCopy(socket, device string, assumeZero bool) error { | ||
| func NbdCopy(socket, device, port string, assumeZero bool) error { | ||
| var nbdcopy string | ||
| var zeroArg string | ||
| if assumeZero { | ||
|
|
@@ -280,7 +283,7 @@ func NbdCopy(socket, device string, assumeZero bool) error { | |
| zeroArg = " " | ||
| } | ||
| if socket == "" { | ||
| nbdcopy = fmt.Sprintf("/usr/bin/nbdcopy nbd://localhost %s%s--progress", device, zeroArg) | ||
| nbdcopy = fmt.Sprintf("/usr/bin/nbdcopy nbd://localhost:%s %s%s--progress", port, device, zeroArg) | ||
| } else { | ||
| nbdcopy = fmt.Sprintf("/usr/bin/nbdcopy %s %s%s--progress", socket, device, zeroArg) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -110,7 +110,7 @@ func OpenstackAuth(ctx context.Context, moduleOpts DstCloud) (*gophercloud.Provi | |
| return provider, nil | ||
| } | ||
|
|
||
| func CreateVolume(provider *gophercloud.ProviderClient, opts VolOpts, setUEFI bool) (*volumes.Volume, error) { | ||
| func CreateVolume(provider *gophercloud.ProviderClient, opts VolOpts, setUEFI bool, timeoutSeconds int) (*volumes.Volume, error) { | ||
| client, err := openstack.NewBlockStorageV3(provider, gophercloud.EndpointOpts{ | ||
| Region: os.Getenv("OS_REGION_NAME"), | ||
| }) | ||
|
|
@@ -133,7 +133,7 @@ func CreateVolume(provider *gophercloud.ProviderClient, opts VolOpts, setUEFI bo | |
| return nil, err | ||
| } | ||
|
|
||
| err = WaitForVolumeStatus(client, volume.ID, "available", 3000) | ||
| err = WaitForVolumeStatus(client, volume.ID, "available", timeoutSeconds) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to wait for volume to become available: %v", err) | ||
| return nil, err | ||
|
|
@@ -165,8 +165,9 @@ func CreateVolume(provider *gophercloud.ProviderClient, opts VolOpts, setUEFI bo | |
| return volume, nil | ||
| } | ||
|
|
||
| func WaitForVolumeStatus(client *gophercloud.ServiceClient, volumeID, status string, timeout int) error { | ||
| for i := 0; i < timeout; i++ { | ||
| func WaitForVolumeStatus(client *gophercloud.ServiceClient, volumeID, status string, timeoutSeconds int) error { | ||
| timeoutIterations := timeoutSeconds / 5 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why just not use the timeout as it was ? |
||
| for i := 0; i < timeoutIterations; i++ { | ||
| volume, err := volumes.Get(context.TODO(), client, volumeID).Extract() | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to get volume status: %v", err) | ||
|
|
@@ -181,8 +182,9 @@ func WaitForVolumeStatus(client *gophercloud.ServiceClient, volumeID, status str | |
| return fmt.Errorf("volume %s did not reach status %s within the timeout", volumeID, status) | ||
| } | ||
|
|
||
| func WaitForServerStatus(client *gophercloud.ServiceClient, serverID, status string, timeout int) error { | ||
| for i := 0; i < timeout; i++ { | ||
| func WaitForServerStatus(client *gophercloud.ServiceClient, serverID, status string, timeoutSeconds int) error { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here too ? |
||
| timeoutIterations := timeoutSeconds / 5 | ||
| for i := 0; i < timeoutIterations; i++ { | ||
| server, err := servers.Get(context.TODO(), client, serverID).Extract() | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to get server status: %v", err) | ||
|
|
@@ -331,7 +333,7 @@ func GetInstanceUUID() (string, error) { | |
| return metaData.UUID, nil | ||
| } | ||
|
|
||
| func AttachVolume(client *gophercloud.ProviderClient, volumeID string, instanceName string, instanceUUID string) error { | ||
| func AttachVolume(client *gophercloud.ProviderClient, volumeID string, instanceName string, instanceUUID string, timeoutSeconds int) error { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think just naming "timeout" is fine. |
||
| computeClient, err := openstack.NewComputeV2(client, gophercloud.EndpointOpts{}) | ||
| logger.Log.Infof("Volume ID: %s", volumeID) | ||
| if err != nil { | ||
|
|
@@ -373,15 +375,15 @@ func AttachVolume(client *gophercloud.ProviderClient, volumeID string, instanceN | |
| logger.Log.Infof("Failed to create block storage client: %v", err) | ||
| return err | ||
| } | ||
| err = WaitForVolumeStatus(volumeClient, volumeID, "in-use", 3000) | ||
| err = WaitForVolumeStatus(volumeClient, volumeID, "in-use", timeoutSeconds) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to wait for volume to become in-use: %v", err) | ||
| return err | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func DetachVolume(client *gophercloud.ProviderClient, volumeID, instanceName, instanceUUID string, cloudOpts DstCloud) error { | ||
| func DetachVolume(client *gophercloud.ProviderClient, volumeID, instanceName, instanceUUID string, cloudOpts DstCloud, timeoutSeconds int) error { | ||
| computeClient, err := openstack.NewComputeV2(client, gophercloud.EndpointOpts{}) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to create compute client: %v", err) | ||
|
|
@@ -434,7 +436,7 @@ func DetachVolume(client *gophercloud.ProviderClient, volumeID, instanceName, in | |
| logger.Log.Infof("Failed to create block storage client: %v", err) | ||
| return err | ||
| } | ||
| err = WaitForVolumeStatus(volumeClient, volumeID, "available", 3000) | ||
| err = WaitForVolumeStatus(volumeClient, volumeID, "available", timeoutSeconds) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to wait for volume to become available: %v", err) | ||
| return err | ||
|
|
@@ -443,7 +445,7 @@ func DetachVolume(client *gophercloud.ProviderClient, volumeID, instanceName, in | |
| return nil | ||
| } | ||
|
|
||
| func CreateServer(provider *gophercloud.ProviderClient, args ServerArgs) (string, error) { | ||
| func CreateServer(provider *gophercloud.ProviderClient, args ServerArgs, timeoutSeconds int) (string, error) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here too: s/timeoutSeconds/timeout/ |
||
| client, err := openstack.NewComputeV2(provider, gophercloud.EndpointOpts{ | ||
| Region: os.Getenv("OS_REGION_NAME"), | ||
| }) | ||
|
|
@@ -499,7 +501,7 @@ func CreateServer(provider *gophercloud.ProviderClient, args ServerArgs) (string | |
| if err != nil { | ||
| return "", fmt.Errorf("failed to create server: %v", err) | ||
| } | ||
| err = WaitForServerStatus(client, server.ID, "ACTIVE", 3000) | ||
| err = WaitForServerStatus(client, server.ID, "ACTIVE", timeoutSeconds) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
|
|
@@ -566,7 +568,7 @@ func DeleteServer(provider *gophercloud.ProviderClient, serverID string) error { | |
| } | ||
|
|
||
| // DeleteVolume deletes a volume by ID | ||
| func DeleteVolume(provider *gophercloud.ProviderClient, volumeID string) error { | ||
| func DeleteVolume(provider *gophercloud.ProviderClient, volumeID string, timeoutSeconds int) error { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here too: s/timeoutSeconds/timeout/ |
||
| client, err := openstack.NewBlockStorageV3(provider, gophercloud.EndpointOpts{ | ||
| Region: os.Getenv("OS_REGION_NAME"), | ||
| }) | ||
|
|
@@ -594,12 +596,12 @@ func DeleteVolume(provider *gophercloud.ProviderClient, volumeID string) error { | |
| } else { | ||
| logger.Log.Infof("Volume %s is in status '%s', waiting for it to become available or error...", volumeID, volume.Status) | ||
|
|
||
| // Wait for volume to become available (timeout: 60 seconds = 12 attempts * 5 seconds) | ||
| err = WaitForVolumeStatus(client, volumeID, "available", 12) | ||
| // Wait for volume to become available | ||
| err = WaitForVolumeStatus(client, volumeID, "available", timeoutSeconds) | ||
| if err != nil { | ||
| // If it doesn't become available, try to wait for error status | ||
| logger.Log.Infof("Volume did not become available, trying to wait for error status...") | ||
| err = WaitForVolumeStatus(client, volumeID, "error", 12) | ||
| err = WaitForVolumeStatus(client, volumeID, "error", timeoutSeconds) | ||
| if err != nil { | ||
| logger.Log.Infof("Volume did not reach a deletable state within timeout: %v", err) | ||
| return fmt.Errorf("volume %s did not reach a deletable state within timeout", volumeID) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ import ( | |
| "errors" | ||
| "fmt" | ||
| "os" | ||
| "path/filepath" | ||
| "strconv" | ||
| "strings" | ||
| moduleutils "vmware-migration-kit/plugins/module_utils" | ||
|
|
@@ -87,20 +88,25 @@ type MigrationConfig struct { | |
| ManageExtVol bool | ||
| BootScript string | ||
| ExtraOpts string | ||
| TimeoutSeconds int | ||
| CinderManageConfig *osm_os.CinderManageConfig | ||
| } | ||
|
|
||
| // Ansible | ||
| type ModuleArgs struct { | ||
| DstCloud osm_os.DstCloud `json:"dst_cloud"` | ||
| User string | ||
| Password string | ||
| Server string | ||
| Libdir string | ||
| VmName string | ||
| VolumeAz string | ||
| VolumeType string | ||
| AssumeZero bool | ||
| DstCloud osm_os.DstCloud `json:"dst_cloud"` | ||
| User string | ||
| Password string | ||
| Server string | ||
| Libdir string | ||
| Port string | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dont think all these values should be configurable via the modules. The port can be useful in some specific cases, but we specified in the network configuration (doc) which ports needs to be open & so on and since we have the total control of the conversion host because it's a host created by us and dedicated to the migration, the nbdkit port can remain what we decided. But if we want to make it as a variable then it should be renamed as NbdkitPort (because here Port can be confused with the Server port) |
||
| TimeoutSeconds int | ||
| NbdkitTimeout int | ||
| NbdkitRetryDelay int | ||
| VmName string | ||
| VolumeAz string | ||
| VolumeType string | ||
| AssumeZero bool | ||
| VddkPath string | ||
| OSMDataDir string | ||
| CBTSync bool | ||
|
|
@@ -245,7 +251,7 @@ func (c *MigrationConfig) VMMigration(parentCtx context.Context, runV2V bool) (s | |
| } else { | ||
| logger.Log.Infof("Creating new volume..") | ||
| // Create volume | ||
| volume, err = osm_os.CreateVolume(c.OSClient, volOpts, uefi) | ||
| volume, err = osm_os.CreateVolume(c.OSClient, volOpts, uefi, c.TimeoutSeconds) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to create volume: %v", err) | ||
| return "", err | ||
|
|
@@ -266,14 +272,14 @@ func (c *MigrationConfig) VMMigration(parentCtx context.Context, runV2V bool) (s | |
| return "", err | ||
| } | ||
| } | ||
| err = osm_os.AttachVolume(c.OSClient, volume.ID, c.ConvHostName, instanceUUID) | ||
| err = osm_os.AttachVolume(c.OSClient, volume.ID, c.ConvHostName, instanceUUID, c.TimeoutSeconds) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to attach volume: %v", err) | ||
| return "", err | ||
| } | ||
| // TODO: remove instanceName or handle it properly | ||
| defer func() { | ||
| if err := osm_os.DetachVolume(c.OSClient, volume.ID, "", instanceUUID, c.CloudOpts); err != nil { | ||
| if err := osm_os.DetachVolume(c.OSClient, volume.ID, "", instanceUUID, c.CloudOpts, c.TimeoutSeconds); err != nil { | ||
| logger.Log.Infof("Failed to detach volume: %v", err) | ||
| } | ||
| }() | ||
|
|
@@ -338,7 +344,7 @@ func (c *MigrationConfig) VMMigration(parentCtx context.Context, runV2V bool) (s | |
| logger.Log.Infof("No change in VM, skipping volume sync") | ||
| } | ||
| } else { | ||
| err = nbdkit.NbdCopy(sock, devPath, c.AssumeZero) | ||
| err = nbdkit.NbdCopy(sock, devPath, c.NbdkitConfig.Port, c.AssumeZero) | ||
| if err != nil { | ||
| logger.Log.Infof("Failed to copy disk: %v", err) | ||
| if err := nbdSrv.Stop(); err != nil { | ||
|
|
@@ -412,6 +418,19 @@ func main() { | |
| server := ansible.RequireField(moduleArgs.Server, "Server is required") | ||
| vmname := ansible.RequireField(moduleArgs.VmName, "VM name is required") | ||
| libdir := ansible.DefaultIfEmpty(moduleArgs.Libdir, "/usr/lib/vmware-vix-disklib") | ||
| port := ansible.DefaultIfEmpty(moduleArgs.Port, "10809") | ||
| timeoutSeconds := moduleArgs.TimeoutSeconds | ||
| if timeoutSeconds == 0 { | ||
| timeoutSeconds = 15000 | ||
| } | ||
| nbdkitTimeout := moduleArgs.NbdkitTimeout | ||
| if nbdkitTimeout == 0 { | ||
| nbdkitTimeout = 30 | ||
| } | ||
| nbdkitRetryDelay := moduleArgs.NbdkitRetryDelay | ||
| if nbdkitRetryDelay == 0 { | ||
| nbdkitRetryDelay = 2 | ||
| } | ||
| vddkpath := ansible.DefaultIfEmpty(moduleArgs.VddkPath, "/ha-datacenter/vm/") | ||
| osmdatadir := ansible.DefaultIfEmpty(moduleArgs.OSMDataDir, "/tmp/") | ||
| convHostName := ansible.DefaultIfEmpty(moduleArgs.ConvHostName, "") | ||
|
|
@@ -441,7 +460,7 @@ func main() { | |
| ansible.FailJson(response) | ||
| } | ||
| safeVmName := moduleutils.SafeVmName(vmname) | ||
| LogFile := "/tmp/osm-nbdkit-" + safeVmName + "-" + r + ".log" | ||
| LogFile := filepath.Join(osmdatadir, "osm-nbdkit-"+safeVmName+"-"+r+".log") | ||
| logger.InitLogger(LogFile) | ||
| ctx, cancel := context.WithCancel(context.Background()) | ||
| defer cancel() | ||
|
|
@@ -494,6 +513,9 @@ func main() { | |
| VmName: vmname, | ||
| Compression: compression, | ||
| UUID: r, | ||
| Port: port, | ||
| Timeout: nbdkitTimeout, | ||
| RetryDelay: nbdkitRetryDelay, | ||
| UseSocks: socks, | ||
| VddkConfig: &vmware.VddkConfig{ | ||
| VirtualMachine: vm, | ||
|
|
@@ -514,14 +536,15 @@ func main() { | |
| Compression: compression, | ||
| RunScript: runScript, | ||
| BootScript: bootScript, | ||
| ExtraOpts: extraOpts, | ||
| InstanceUUID: instanceUUid, | ||
| Debug: debug, | ||
| LocalDiskPath: localDisk, | ||
| CloudOpts: moduleArgs.DstCloud, | ||
| VolumeType: volType, | ||
| VolumeAz: volAz, | ||
| AssumeZero: assumeZero, | ||
| ExtraOpts: extraOpts, | ||
| InstanceUUID: instanceUUid, | ||
| Debug: debug, | ||
| LocalDiskPath: localDisk, | ||
| CloudOpts: moduleArgs.DstCloud, | ||
| VolumeType: volType, | ||
| VolumeAz: volAz, | ||
| AssumeZero: assumeZero, | ||
| TimeoutSeconds: timeoutSeconds, | ||
| } | ||
| volUUID, err := VMMigration.VMMigration(ctx, runV2V) | ||
| if err != nil { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here, As I said in the module migrate.go, I dont think we should expose to the user the Timeout for nbdkit and the retry as well. It could be configurable at the package level, but I think it can be risky to expose those settings for several reasons (we know those retry limits are sane even in a stressed environment).