Skip to content

Commit a3cdd1f

Browse files
Allow deploy Admin VMs and VRs in disabled zones/pods/clusters (#3600)
1 parent 6531ee5 commit a3cdd1f

8 files changed

Lines changed: 372 additions & 99 deletions

File tree

engine/components-api/src/main/java/com/cloud/deploy/DeploymentPlanningManager.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,17 @@
2222
import com.cloud.exception.InsufficientServerCapacityException;
2323
import com.cloud.utils.component.Manager;
2424
import com.cloud.vm.VirtualMachineProfile;
25+
import org.apache.cloudstack.framework.config.ConfigKey;
2526

2627
public interface DeploymentPlanningManager extends Manager {
2728

29+
30+
static final ConfigKey<Boolean> allowRouterOnDisabledResource = new ConfigKey<Boolean>("Advanced", Boolean.class, "allow.router.on.disabled.resources", "false",
31+
"Allow deploying VR in disabled Zones, Pods, and Clusters", true);
32+
33+
static final ConfigKey<Boolean> allowAdminVmOnDisabledResource = new ConfigKey<Boolean>("Advanced", Boolean.class, "allow.admin.vm.on.disabled.resources", "false",
34+
"Allow deploying VMs owned by the admin account in disabled Clusters, Pods, and Zones", true);
35+
2836
/**
2937
* Manages vm deployment stages: First Process Affinity/Anti-affinity - Call
3038
* the chain of AffinityGroupProcessor adapters to set deploymentplan scope

engine/schema/src/main/java/com/cloud/host/dao/HostDao.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,21 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
8383

8484
List<HostVO> findByClusterId(Long clusterId);
8585

86+
/**
87+
* Returns hosts that are 'Up' and 'Enabled' from the given Data Center/Zone
88+
*/
8689
List<HostVO> listByDataCenterId(long id);
8790

91+
/**
92+
* Returns hosts that are from the given Data Center/Zone and at a given state (e.g. Creating, Enabled, Disabled, etc).
93+
*/
94+
List<HostVO> listByDataCenterIdAndState(long id, ResourceState state);
95+
96+
/**
97+
* Returns hosts that are 'Up' and 'Disabled' from the given Data Center/Zone
98+
*/
99+
List<HostVO> listDisabledByDataCenterId(long id);
100+
88101
List<HostVO> listByDataCenterIdAndHypervisorType(long zoneId, Hypervisor.HypervisorType hypervisorType);
89102

90103
List<Long> listAllHosts(long zoneId);

engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -463,13 +463,27 @@ public Integer countAllByTypeInZone(long zoneId, Type type) {
463463

464464
@Override
465465
public List<HostVO> listByDataCenterId(long id) {
466+
return listByDataCenterIdAndState(id, ResourceState.Enabled);
467+
}
468+
469+
@Override
470+
public List<HostVO> listByDataCenterIdAndState(long id, ResourceState state) {
471+
SearchCriteria<HostVO> sc = scHostsFromZoneUpRouting(id);
472+
sc.setParameters("resourceState", state);
473+
return listBy(sc);
474+
}
475+
476+
@Override
477+
public List<HostVO> listDisabledByDataCenterId(long id) {
478+
return listByDataCenterIdAndState(id, ResourceState.Disabled);
479+
}
480+
481+
private SearchCriteria<HostVO> scHostsFromZoneUpRouting(long id) {
466482
SearchCriteria<HostVO> sc = DcSearch.create();
467483
sc.setParameters("dc", id);
468484
sc.setParameters("status", Status.Up);
469485
sc.setParameters("type", Host.Type.Routing);
470-
sc.setParameters("resourceState", ResourceState.Enabled);
471-
472-
return listBy(sc);
486+
return sc;
473487
}
474488

475489
@Override

engine/schema/src/main/java/com/cloud/vm/VMInstanceVO.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ public VMInstanceVO(long id, long serviceOfferingId, String name, String instanc
238238
this.diskOfferingId = diskOfferingId;
239239
}
240240

241-
protected VMInstanceVO() {
241+
public VMInstanceVO() {
242242
}
243243

244244
public Date getRemoved() {

server/src/main/java/com/cloud/agent/manager/allocator/impl/RecreateHostAllocator.java

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
import com.cloud.host.Host;
4646
import com.cloud.host.Host.Type;
4747
import com.cloud.host.dao.HostDao;
48-
import com.cloud.org.Grouping;
4948
import com.cloud.resource.ResourceManager;
5049
import com.cloud.storage.VolumeVO;
5150
import com.cloud.storage.dao.VolumeDao;
@@ -122,21 +121,7 @@ public List<Host> allocateTo(VirtualMachineProfile vm, DeploymentPlan plan, Type
122121
}
123122

124123
for (PodCluster p : pcs) {
125-
if (p.getPod().getAllocationState() != Grouping.AllocationState.Enabled) {
126-
if (s_logger.isDebugEnabled()) {
127-
s_logger.debug("Pod name: " + p.getPod().getName() + ", podId: " + p.getPod().getId() + " is in " + p.getPod().getAllocationState().name() +
128-
" state, skipping this and trying other pods");
129-
}
130-
continue;
131-
}
132124
Long clusterId = p.getCluster() == null ? null : p.getCluster().getId();
133-
if (p.getCluster() != null && p.getCluster().getAllocationState() != Grouping.AllocationState.Enabled) {
134-
if (s_logger.isDebugEnabled()) {
135-
s_logger.debug("Cluster name: " + p.getCluster().getName() + ", clusterId: " + clusterId + " is in " + p.getCluster().getAllocationState().name() +
136-
" state, skipping this and trying other pod-clusters");
137-
}
138-
continue;
139-
}
140125
DataCenterDeployment newPlan = new DataCenterDeployment(plan.getDataCenterId(), p.getPod().getId(), clusterId, null, null, null);
141126
hosts = super.allocateTo(vm, newPlan, type, avoid, returnUpTo);
142127
if (hosts != null && !hosts.isEmpty()) {

server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java

Lines changed: 122 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@
3131
import javax.naming.ConfigurationException;
3232

3333
import org.apache.cloudstack.affinity.AffinityGroupDomainMapVO;
34+
import com.cloud.storage.VMTemplateVO;
35+
import com.cloud.storage.dao.VMTemplateDao;
36+
import com.cloud.user.AccountVO;
37+
import com.cloud.user.dao.AccountDao;
38+
import com.cloud.utils.StringUtils;
39+
import com.cloud.exception.StorageUnavailableException;
40+
import com.cloud.utils.db.Filter;
41+
import com.cloud.utils.fsm.StateMachine2;
42+
43+
import org.apache.cloudstack.framework.config.ConfigKey;
44+
import org.apache.cloudstack.framework.config.Configurable;
45+
import org.apache.commons.collections.CollectionUtils;
46+
import org.apache.commons.collections.MapUtils;
47+
import org.apache.log4j.Logger;
3448
import org.apache.cloudstack.affinity.AffinityGroupProcessor;
3549
import org.apache.cloudstack.affinity.AffinityGroupService;
3650
import org.apache.cloudstack.affinity.AffinityGroupVMMapVO;
@@ -50,9 +64,6 @@
5064
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
5165
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
5266
import org.apache.cloudstack.utils.identity.ManagementServerNode;
53-
import org.apache.commons.collections.CollectionUtils;
54-
import org.apache.commons.collections.MapUtils;
55-
import org.apache.log4j.Logger;
5667

5768
import com.cloud.agent.AgentManager;
5869
import com.cloud.agent.Listener;
@@ -84,7 +95,6 @@
8495
import com.cloud.exception.AffinityConflictException;
8596
import com.cloud.exception.ConnectionException;
8697
import com.cloud.exception.InsufficientServerCapacityException;
87-
import com.cloud.exception.StorageUnavailableException;
8898
import com.cloud.gpu.GPU;
8999
import com.cloud.host.DetailVO;
90100
import com.cloud.host.Host;
@@ -97,7 +107,6 @@
97107
import com.cloud.org.Cluster;
98108
import com.cloud.org.Grouping;
99109
import com.cloud.resource.ResourceManager;
100-
import com.cloud.resource.ResourceState;
101110
import com.cloud.service.ServiceOfferingDetailsVO;
102111
import com.cloud.service.dao.ServiceOfferingDetailsDao;
103112
import com.cloud.storage.DiskOfferingVO;
@@ -107,31 +116,26 @@
107116
import com.cloud.storage.StorageManager;
108117
import com.cloud.storage.StoragePool;
109118
import com.cloud.storage.StoragePoolHostVO;
110-
import com.cloud.storage.VMTemplateVO;
111119
import com.cloud.storage.Volume;
112120
import com.cloud.storage.VolumeVO;
113121
import com.cloud.storage.dao.DiskOfferingDao;
114122
import com.cloud.storage.dao.GuestOSCategoryDao;
115123
import com.cloud.storage.dao.GuestOSDao;
116124
import com.cloud.storage.dao.StoragePoolHostDao;
117-
import com.cloud.storage.dao.VMTemplateDao;
118125
import com.cloud.storage.dao.VolumeDao;
119126
import com.cloud.user.AccountManager;
120127
import com.cloud.utils.DateUtil;
121128
import com.cloud.utils.NumbersUtil;
122129
import com.cloud.utils.Pair;
123-
import com.cloud.utils.StringUtils;
124130
import com.cloud.utils.component.Manager;
125131
import com.cloud.utils.component.ManagerBase;
126132
import com.cloud.utils.db.DB;
127-
import com.cloud.utils.db.Filter;
128133
import com.cloud.utils.db.SearchCriteria;
129134
import com.cloud.utils.db.Transaction;
130135
import com.cloud.utils.db.TransactionCallback;
131136
import com.cloud.utils.db.TransactionStatus;
132137
import com.cloud.utils.exception.CloudRuntimeException;
133138
import com.cloud.utils.fsm.StateListener;
134-
import com.cloud.utils.fsm.StateMachine2;
135139
import com.cloud.vm.DiskProfile;
136140
import com.cloud.vm.VMInstanceVO;
137141
import com.cloud.vm.VirtualMachine;
@@ -144,12 +148,14 @@
144148
import static com.cloud.utils.NumbersUtil.toHumanReadableSize;
145149

146150
public class DeploymentPlanningManagerImpl extends ManagerBase implements DeploymentPlanningManager, Manager, Listener,
147-
StateListener<State, VirtualMachine.Event, VirtualMachine> {
151+
StateListener<State, VirtualMachine.Event, VirtualMachine>, Configurable {
148152

149153
private static final Logger s_logger = Logger.getLogger(DeploymentPlanningManagerImpl.class);
150154
@Inject
151155
AgentManager _agentMgr;
152156
@Inject
157+
private AccountDao accountDao;
158+
@Inject
153159
protected UserVmDao _vmDao;
154160
@Inject
155161
protected VMInstanceDao _vmInstanceDao;
@@ -177,6 +183,7 @@ public class DeploymentPlanningManagerImpl extends ManagerBase implements Deploy
177183
@Inject
178184
private VMTemplateDao templateDao;
179185

186+
private static final long ADMIN_ACCOUNT_ROLE_ID = 1l;
180187
private static final long INITIAL_RESERVATION_RELEASE_CHECKER_DELAY = 30L * 1000L; // thirty seconds expressed in milliseconds
181188
protected long _nodeId = -1;
182189

@@ -283,6 +290,8 @@ public DeployDestination planDeployment(VirtualMachineProfile vmProfile, Deploym
283290
s_logger.debug("Is ROOT volume READY (pool already allocated)?: " + (plan.getPoolId() != null ? "Yes" : "No"));
284291
}
285292

293+
avoidDisabledResources(vmProfile, dc, avoids);
294+
286295
String haVmTag = (String)vmProfile.getParameter(VirtualMachineProfile.Param.HaTag);
287296
String uefiFlag = (String)vmProfile.getParameter(VirtualMachineProfile.Param.UefiFlag);
288297

@@ -311,17 +320,8 @@ public DeployDestination planDeployment(VirtualMachineProfile vmProfile, Deploym
311320
}
312321

313322
Pod pod = _podDao.findById(host.getPodId());
314-
// check if the cluster or the pod is disabled
315-
if (pod.getAllocationState() != Grouping.AllocationState.Enabled) {
316-
s_logger.warn("The Pod containing this host is in disabled state, PodId= " + pod.getId());
317-
return null;
318-
}
319323

320324
Cluster cluster = _clusterDao.findById(host.getClusterId());
321-
if (cluster.getAllocationState() != Grouping.AllocationState.Enabled) {
322-
s_logger.warn("The Cluster containing this host is in disabled state, PodId= " + cluster.getId());
323-
return null;
324-
}
325325

326326
boolean displayStorage = getDisplayStorageFromVmProfile(vmProfile);
327327
if (vm.getHypervisorType() == HypervisorType.BareMetal) {
@@ -422,8 +422,15 @@ public DeployDestination planDeployment(VirtualMachineProfile vmProfile, Deploym
422422
s_logger.debug("The last host of this VM does not have required GPU devices available");
423423
}
424424
} else {
425-
if (host.getStatus() == Status.Up && host.getResourceState() == ResourceState.Enabled) {
426-
if (checkVmProfileAndHost(vmProfile, host)) {
425+
if (host.getStatus() == Status.Up) {
426+
boolean hostTagsMatch = true;
427+
if(offering.getHostTag() != null){
428+
_hostDao.loadHostTags(host);
429+
if (!(host.getHostTags() != null && host.getHostTags().contains(offering.getHostTag()))) {
430+
hostTagsMatch = false;
431+
}
432+
}
433+
if (hostTagsMatch) {
427434
long cluster_id = host.getClusterId();
428435
ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id,
429436
"cpuOvercommitRatio");
@@ -573,6 +580,86 @@ private boolean getDisplayStorageFromVmProfile(VirtualMachineProfile vmProfile)
573580
return vmProfile == null || vmProfile.getTemplate() == null || !vmProfile.getTemplate().isDeployAsIs();
574581
}
575582

583+
/**
584+
* Adds disabled resources (Data centers, Pods, Clusters, and hosts) to exclude list (avoid) in case of disabled state.
585+
*/
586+
public void avoidDisabledResources(VirtualMachineProfile vmProfile, DataCenter dc, ExcludeList avoids) {
587+
if (vmProfile.getType().isUsedBySystem() && isRouterDeployableInDisabledResources()) {
588+
return;
589+
}
590+
591+
VMInstanceVO vm = _vmInstanceDao.findById(vmProfile.getId());
592+
AccountVO owner = accountDao.findById(vm.getAccountId());
593+
boolean isOwnerRoleIdAdmin = false;
594+
595+
if (owner != null && owner.getRoleId() != null && owner.getRoleId() == ADMIN_ACCOUNT_ROLE_ID) {
596+
isOwnerRoleIdAdmin = true;
597+
}
598+
599+
if (isOwnerRoleIdAdmin && isAdminVmDeployableInDisabledResources()) {
600+
return;
601+
}
602+
603+
avoidDisabledDataCenters(dc, avoids);
604+
avoidDisabledPods(dc, avoids);
605+
avoidDisabledClusters(dc, avoids);
606+
avoidDisabledHosts(dc, avoids);
607+
}
608+
609+
/**
610+
* Returns the value of the ConfigKey 'allow.router.on.disabled.resources'.
611+
* @note this method allows mocking and testing with the respective ConfigKey parameter.
612+
*/
613+
protected boolean isRouterDeployableInDisabledResources() {
614+
return allowRouterOnDisabledResource.value();
615+
}
616+
617+
/**
618+
* Returns the value of the ConfigKey 'allow.admin.vm.on.disabled.resources'.
619+
* @note this method allows mocking and testing with the respective ConfigKey parameter.
620+
*/
621+
protected boolean isAdminVmDeployableInDisabledResources() {
622+
return allowAdminVmOnDisabledResource.value();
623+
}
624+
625+
/**
626+
* Adds disabled Hosts to the ExcludeList in order to avoid them at the deployment planner.
627+
*/
628+
protected void avoidDisabledHosts(DataCenter dc, ExcludeList avoids) {
629+
List<HostVO> disabledHosts = _hostDao.listDisabledByDataCenterId(dc.getId());
630+
for (HostVO host : disabledHosts) {
631+
avoids.addHost(host.getId());
632+
}
633+
}
634+
635+
/**
636+
* Adds disabled Clusters to the ExcludeList in order to avoid them at the deployment planner.
637+
*/
638+
protected void avoidDisabledClusters(DataCenter dc, ExcludeList avoids) {
639+
List<Long> pods = _podDao.listAllPods(dc.getId());
640+
for (Long podId : pods) {
641+
List<Long> disabledClusters = _clusterDao.listDisabledClusters(dc.getId(), podId);
642+
avoids.addClusterList(disabledClusters);
643+
}
644+
}
645+
646+
/**
647+
* Adds disabled Pods to the ExcludeList in order to avoid them at the deployment planner.
648+
*/
649+
protected void avoidDisabledPods(DataCenter dc, ExcludeList avoids) {
650+
List<Long> disabledPods = _podDao.listDisabledPods(dc.getId());
651+
avoids.addPodList(disabledPods);
652+
}
653+
654+
/**
655+
* Adds disabled Data Centers (Zones) to the ExcludeList in order to avoid them at the deployment planner.
656+
*/
657+
protected void avoidDisabledDataCenters(DataCenter dc, ExcludeList avoids) {
658+
if (dc.getAllocationState() == Grouping.AllocationState.Disabled) {
659+
avoids.addDataCenter(dc.getId());
660+
}
661+
}
662+
576663
@Override
577664
public DeploymentPlanner getDeploymentPlannerByName(String plannerName) {
578665
if (plannerName != null) {
@@ -1092,11 +1179,6 @@ private DeployDestination checkClustersforDestination(List<Long> clusterList, Vi
10921179
for (Long clusterId : clusterList) {
10931180
ClusterVO clusterVO = _clusterDao.findById(clusterId);
10941181

1095-
if (clusterVO.getAllocationState() == Grouping.AllocationState.Disabled && !plan.isMigrationPlan()) {
1096-
s_logger.debug("Cannot deploy in disabled cluster " + clusterId + ", skipping this cluster");
1097-
avoid.addCluster(clusterVO.getId());
1098-
}
1099-
11001182
if (clusterVO.getHypervisorType() != vmProfile.getHypervisorType()) {
11011183
s_logger.debug("Cluster: " + clusterId + " has HyperVisorType that does not match the VM, skipping this cluster");
11021184
avoid.addCluster(clusterVO.getId());
@@ -1110,7 +1192,9 @@ private DeployDestination checkClustersforDestination(List<Long> clusterList, Vi
11101192
new DataCenterDeployment(plan.getDataCenterId(), clusterVO.getPodId(), clusterVO.getId(), null, plan.getPoolId(), null, plan.getReservationContext());
11111193

11121194
Pod pod = _podDao.findById(clusterVO.getPodId());
1113-
if (pod.getAllocationState() == Grouping.AllocationState.Enabled ) {
1195+
if (CollectionUtils.isNotEmpty(avoid.getPodsToAvoid()) && avoid.getPodsToAvoid().contains(pod.getId())) {
1196+
s_logger.debug("The cluster is in a disabled pod : " + pod.getId());
1197+
} else {
11141198
// find suitable hosts under this cluster, need as many hosts as we
11151199
// get.
11161200
List<Host> suitableHosts = findSuitableHosts(vmProfile, potentialPlan, avoid, HostAllocator.RETURN_UPTO_ALL);
@@ -1151,9 +1235,6 @@ private DeployDestination checkClustersforDestination(List<Long> clusterList, Vi
11511235
s_logger.debug("No suitable hosts found under this Cluster: " + clusterId);
11521236
}
11531237
}
1154-
else {
1155-
s_logger.debug("The cluster is in a disabled pod : " + pod.getId());
1156-
}
11571238

11581239
if (canAvoidCluster(clusterVO, avoid, plannerAvoidOutput, vmProfile)) {
11591240
avoid.addCluster(clusterVO.getId());
@@ -1739,4 +1820,14 @@ public boolean postStateTransitionEvent(StateMachine2.Transition<State, Event> t
17391820
}
17401821
return true;
17411822
}
1823+
1824+
@Override
1825+
public ConfigKey<?>[] getConfigKeys() {
1826+
return new ConfigKey<?>[] {allowRouterOnDisabledResource, allowAdminVmOnDisabledResource};
1827+
}
1828+
1829+
@Override
1830+
public String getConfigComponentName() {
1831+
return DeploymentPlanningManager.class.getSimpleName();
1832+
}
17421833
}

0 commit comments

Comments
 (0)