Skip to content

Commit 61189db

Browse files
committed
CE-113 configure workervm gc based on job expiry
1 parent c689d4a commit 61189db

3 files changed

Lines changed: 19 additions & 13 deletions

File tree

framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@
8585

8686
public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, ClusterManagerListener, Configurable {
8787
// Advanced
88-
private static final ConfigKey<Long> JobExpireMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.expire.minutes", "1440",
88+
public static final ConfigKey<Long> JobExpireMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.expire.minutes", "1440",
8989
"Time (in minutes) for async-jobs to be kept in system", true, ConfigKey.Scope.Global);
90-
private static final ConfigKey<Long> JobCancelThresholdMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.cancel.threshold.minutes", "60",
90+
public static final ConfigKey<Long> JobCancelThresholdMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.cancel.threshold.minutes", "60",
9191
"Time (in minutes) for async-jobs to be forcely cancelled if it has been in process for long", true, ConfigKey.Scope.Global);
9292
private static final ConfigKey<Integer> VmJobLockTimeout = new ConfigKey<Integer>("Advanced",
9393
Integer.class, "vm.job.lock.timeout", "1800",

plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManager.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ public interface VmwareManager {
3535
public static final ConfigKey<Long> s_vmwareNicHotplugWaitTimeout = new ConfigKey<Long>("Advanced", Long.class, "vmware.nic.hotplug.wait.timeout", "15000",
3636
"Wait timeout (milli seconds) for hot plugged NIC of VM to be detected by guest OS.", false, ConfigKey.Scope.Global);
3737

38+
public static final ConfigKey<Boolean> s_vmwareCleanOldWorderVMs = new ConfigKey<Boolean>("Advanced", Boolean.class, "vmware.clean.old.worker.vms", "false",
39+
"If a worker vm is older then twice the 'job.expire.minutes' + 'job.cancel.threshold.minutes' , remove it.", true, ConfigKey.Scope.Global);
40+
3841
String composeWorkerName();
3942

4043
String getSystemVMIsoFileNameOnDatastore();

plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import javax.inject.Inject;
3636
import javax.naming.ConfigurationException;
3737

38+
import org.apache.cloudstack.framework.jobs.impl.AsyncJobManagerImpl;
3839
import org.apache.log4j.Logger;
3940

4041
import com.vmware.vim25.AboutInfo;
@@ -128,6 +129,7 @@
128129
public class VmwareManagerImpl extends ManagerBase implements VmwareManager, VmwareStorageMount, Listener, VmwareDatacenterService, Configurable {
129130
private static final Logger s_logger = Logger.getLogger(VmwareManagerImpl.class);
130131

132+
private static final long MILISECONDS_PER_MINUTE = 60000;
131133
private static final int STARTUP_DELAY = 60000; // 60 seconds
132134
private static final long DEFAULT_HOST_SCAN_INTERVAL = 600000; // every 10 minutes
133135
private long _hostScanInterval = DEFAULT_HOST_SCAN_INTERVAL;
@@ -212,7 +214,7 @@ public String getConfigComponentName() {
212214

213215
@Override
214216
public ConfigKey<?>[] getConfigKeys() {
215-
return new ConfigKey<?>[] {s_vmwareNicHotplugWaitTimeout};
217+
return new ConfigKey<?>[] {s_vmwareNicHotplugWaitTimeout, s_vmwareCleanOldWorderVMs};
216218
}
217219

218220
@Override
@@ -534,7 +536,7 @@ public boolean needRecycle(String workerTag) {
534536
return false;
535537
}
536538

537-
Long.parseLong(tokens[0]);
539+
long startTick = Long.parseLong(tokens[0]);
538540
long msid = Long.parseLong(tokens[1]);
539541
long runid = Long.parseLong(tokens[2]);
540542

@@ -550,15 +552,16 @@ public boolean needRecycle(String workerTag) {
550552
return true;
551553
}
552554

553-
// disable time-out check until we have found out a VMware API that can check if
554-
// there are pending tasks on the subject VM
555-
/*
556-
if(System.currentTimeMillis() - startTick > _hungWorkerTimeout) {
557-
if(s_logger.isInfoEnabled())
558-
s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000);
559-
return true;
560-
}
561-
*/
555+
// this time-out check was disabled
556+
// "until we have found out a VMware API that can check if there are pending tasks on the subject VM"
557+
// but as we expire jobs and those stale worker VMs stay around untill an MS reboot we opt in to have them removed anyway
558+
Long hungWorkerTimeout = 2 * (AsyncJobManagerImpl.JobExpireMinutes.value() + AsyncJobManagerImpl.JobCancelThresholdMinutes.value()) * MILISECONDS_PER_MINUTE;
559+
if(s_vmwareCleanOldWorderVMs.value() && System.currentTimeMillis() - startTick > hungWorkerTimeout) {
560+
if(s_logger.isInfoEnabled()) {
561+
s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000);
562+
}
563+
return true;
564+
}
562565
return false;
563566
}
564567

0 commit comments

Comments
 (0)