Skip to content

Commit a4d9825

Browse files
author
kunal.behbudzade
committed
KVM: tighten UEFI disk-only snapshot handling
Keep stopped UEFI snapshot creation tied to the last host so the agent copies the active host-local NVRAM file, while still allowing capable-host selection for revert and NVRAM sidecar cleanup. Also handle QEMU guest-agent error payloads consistently and add the API/UI warning for the brief suspend used when snapshotting running UEFI guests. Signed-off-by: kunal.behbudzade <kunal.behbudzade@btsgrp.com>
1 parent 75bf215 commit a4d9825

8 files changed

Lines changed: 260 additions & 19 deletions

File tree

api/src/main/java/org/apache/cloudstack/api/command/user/vmsnapshot/CreateVMSnapshotCmd.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
import com.cloud.vm.VirtualMachine;
3838
import com.cloud.vm.snapshot.VMSnapshot;
3939

40-
@APICommand(name = "createVMSnapshot", description = "Creates Snapshot for an Instance.", responseObject = VMSnapshotResponse.class, since = "4.2.0", entityType = {VMSnapshot.class},
40+
@APICommand(name = "createVMSnapshot", description = "Creates Snapshot for an Instance. Running KVM UEFI disk-only snapshots briefly suspend the Instance while copying NVRAM state.",
41+
responseObject = VMSnapshotResponse.class, since = "4.2.0", entityType = {VMSnapshot.class},
4142
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false)
4243
public class CreateVMSnapshotCmd extends BaseAsyncCreateCmd {
4344

engine/storage/snapshot/src/main/java/org/apache/cloudstack/storage/vmsnapshot/KvmFileBasedStorageVmSnapshotStrategy.java

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import com.cloud.event.UsageEventUtils;
3535
import com.cloud.host.DetailVO;
3636
import com.cloud.host.Host;
37+
import com.cloud.host.HostVO;
3738
import com.cloud.host.dao.HostDetailsDao;
3839
import com.cloud.hypervisor.Hypervisor;
3940
import com.cloud.storage.DataStoreRole;
@@ -134,7 +135,7 @@ public boolean deleteVMSnapshot(VMSnapshot vmSnapshot) {
134135
logger.info("Starting VM snapshot delete process for snapshot [{}].", vmSnapshot.getUuid());
135136
UserVmVO userVm = userVmDao.findById(vmSnapshot.getVmId());
136137
VMSnapshotVO vmSnapshotBeingDeleted = (VMSnapshotVO) vmSnapshot;
137-
Long hostId = vmSnapshotHelper.pickRunningHost(vmSnapshotBeingDeleted.getVmId());
138+
Long hostId = pickHostForNvramSidecarCleanup(vmSnapshotBeingDeleted, userVm, "delete");
138139
validateHostSupportsNvramSidecarCleanup(vmSnapshotBeingDeleted, hostId, "delete");
139140
long virtualSize = 0;
140141
boolean isCurrent = vmSnapshotBeingDeleted.getCurrent();
@@ -197,7 +198,7 @@ public boolean revertVMSnapshot(VMSnapshot vmSnapshot) {
197198
}
198199

199200
VMSnapshotVO vmSnapshotBeingReverted = (VMSnapshotVO) vmSnapshot;
200-
Long hostId = vmSnapshotHelper.pickRunningHost(vmSnapshotBeingReverted.getVmId());
201+
Long hostId = pickHostForUefiNvramAwareDiskOnlySnapshot(userVm, "revert");
201202
validateHostSupportsUefiNvramAwareDiskOnlySnapshots(hostId, userVm, "revert");
202203

203204
transitStateWithoutThrow(vmSnapshotBeingReverted, VMSnapshot.Event.RevertRequested);
@@ -229,7 +230,9 @@ public boolean revertVMSnapshot(VMSnapshot vmSnapshot) {
229230
publishUsageEvent(EventTypes.EVENT_VM_SNAPSHOT_REVERT, vmSnapshotBeingReverted, userVm, volumeObjectTo);
230231
}
231232

232-
if (isUefiVm(userVm)) {
233+
if (isUefiVm(userVm) && !Objects.equals(userVm.getLastHostId(), hostId)) {
234+
logger.debug("Updating last host of UEFI VM [{}] to [{}] after disk-only snapshot revert because the NVRAM state was restored on that host.",
235+
userVm.getUuid(), hostId);
233236
userVm.setLastHostId(hostId);
234237
userVmDao.update(userVm.getId(), userVm);
235238
}
@@ -409,7 +412,6 @@ protected void deleteNvramSnapshotIfNeeded(VMSnapshotVO vmSnapshotVO, Long hostI
409412
return;
410413
}
411414

412-
validateHostSupportsNvramSidecarCleanup(vmSnapshotVO, hostId, "delete");
413415
DeleteDiskOnlyVmSnapshotCommand deleteSnapshotCommand = new DeleteDiskOnlyVmSnapshotCommand(List.of(), nvramSnapshotPath, primaryDataStore);
414416
Answer answer = agentMgr.easySend(hostId, deleteSnapshotCommand);
415417
if (answer == null || !answer.getResult()) {
@@ -520,7 +522,7 @@ protected VMSnapshot takeVmSnapshotInternal(VMSnapshot vmSnapshot, Map<VolumeInf
520522

521523
logger.info("Starting disk-only VM snapshot process for VM [{}].", userVm.getUuid());
522524

523-
Long hostId = vmSnapshotHelper.pickRunningHost(vmSnapshot.getVmId());
525+
Long hostId = pickHostForUefiNvramAwareDiskOnlySnapshot(userVm, "create");
524526
validateHostSupportsUefiNvramAwareDiskOnlySnapshots(hostId, userVm, "create");
525527
VMSnapshotVO vmSnapshotVO = (VMSnapshotVO) vmSnapshot;
526528
List<VolumeObjectTO> volumeTOs = vmSnapshotHelper.getVolumeTOList(userVm.getId());
@@ -760,6 +762,97 @@ protected String getNvramSnapshotPath(VMSnapshotVO vmSnapshot) {
760762
return nvramDetail != null ? nvramDetail.getValue() : null;
761763
}
762764

765+
protected Long pickHostForUefiNvramAwareDiskOnlySnapshot(UserVm userVm, String operation) {
766+
Long selectedHostId = vmSnapshotHelper.pickRunningHost(userVm.getId());
767+
if (!isUefiVm(userVm)) {
768+
return selectedHostId;
769+
}
770+
771+
boolean isCreate = "create".equals(operation);
772+
if (isCreate) {
773+
validateUefiSnapshotCreateHostOwnsActiveNvram(userVm, selectedHostId);
774+
}
775+
776+
return pickHostWithRequiredCapabilities(userVm, selectedHostId, operation, !isCreate,
777+
List.of(Host.HOST_UEFI_ENABLE, Host.HOST_KVM_DISK_ONLY_VM_SNAPSHOT_NVRAM));
778+
}
779+
780+
protected void validateUefiSnapshotCreateHostOwnsActiveNvram(UserVm userVm, Long selectedHostId) {
781+
if (VirtualMachine.State.Running.equals(userVm.getState())) {
782+
return;
783+
}
784+
785+
Long lastHostId = userVm.getLastHostId();
786+
if (lastHostId == null || !Objects.equals(lastHostId, selectedHostId)) {
787+
throw new CloudRuntimeException(String.format("Cannot create a disk-only snapshot for stopped UEFI VM [%s] on host [%s] because the active NVRAM "
788+
+ "state is expected on last host [%s]. Make the last host available or start the VM on a UEFI-capable KVM host before retrying.",
789+
userVm.getUuid(), selectedHostId, lastHostId));
790+
}
791+
}
792+
793+
protected Long pickHostForNvramSidecarCleanup(VMSnapshotVO vmSnapshotVO, UserVm userVm, String operation) {
794+
Long selectedHostId = vmSnapshotHelper.pickRunningHost(vmSnapshotVO.getVmId());
795+
if (StringUtils.isBlank(getNvramSnapshotPath(vmSnapshotVO))) {
796+
return selectedHostId;
797+
}
798+
799+
return pickHostWithRequiredCapabilities(userVm, selectedHostId, operation, true, List.of(Host.HOST_KVM_DISK_ONLY_VM_SNAPSHOT_NVRAM));
800+
}
801+
802+
protected Long pickHostWithRequiredCapabilities(UserVm userVm, Long selectedHostId, String operation, boolean canFallbackFromSelectedHost,
803+
List<String> requiredCapabilities) {
804+
if (hostSupportsCapabilities(selectedHostId, requiredCapabilities)) {
805+
return selectedHostId;
806+
}
807+
808+
if (VirtualMachine.State.Running.equals(userVm.getState()) || !canFallbackFromSelectedHost) {
809+
return selectedHostId;
810+
}
811+
812+
return listCandidateHostsForVmSnapshot(userVm).stream()
813+
.filter(host -> hostSupportsCapabilities(host.getId(), requiredCapabilities))
814+
.findFirst()
815+
.map(HostVO::getId)
816+
.orElseThrow(() -> new CloudRuntimeException(String.format("Cannot %s disk-only snapshot state for VM [%s] because no Up and Enabled host in the "
817+
+ "VM storage scope advertises [%s].", operation, userVm.getUuid(), String.join(", ", requiredCapabilities))));
818+
}
819+
820+
protected List<HostVO> listCandidateHostsForVmSnapshot(UserVm userVm) {
821+
List<VolumeVO> volumes = volumeDao.findByInstance(userVm.getId());
822+
if (CollectionUtils.isEmpty(volumes)) {
823+
throw new CloudRuntimeException(String.format("Cannot find a host for VM snapshot operation because VM [%s] has no volumes.", userVm.getUuid()));
824+
}
825+
826+
VolumeVO volume = volumes.stream()
827+
.filter(volumeVO -> Volume.Type.ROOT.equals(volumeVO.getVolumeType()))
828+
.findFirst()
829+
.orElse(volumes.get(0));
830+
Long poolId = volume.getPoolId();
831+
if (poolId == null) {
832+
throw new CloudRuntimeException(String.format("Cannot find a host for VM snapshot operation because volume [%s] has no pool.", volume.getUuid()));
833+
}
834+
835+
StoragePoolVO storagePoolVO = storagePool.findById(poolId);
836+
if (storagePoolVO == null) {
837+
throw new CloudRuntimeException(String.format("Cannot find a host for VM snapshot operation because storage pool [%s] was not found.", poolId));
838+
}
839+
840+
List<HostVO> hosts = hostDao.listAllUpAndEnabledNonHAHosts(Host.Type.Routing, storagePoolVO.getClusterId(), storagePoolVO.getPodId(),
841+
storagePoolVO.getDataCenterId(), null);
842+
if (CollectionUtils.isEmpty(hosts)) {
843+
throw new CloudRuntimeException(String.format("Cannot find a host for VM snapshot operation because no Up and Enabled host was found in storage pool [%s] scope.",
844+
storagePoolVO.getUuid()));
845+
}
846+
return hosts;
847+
}
848+
849+
protected boolean hostSupportsCapabilities(Long hostId, List<String> requiredCapabilities) {
850+
if (hostId == null || CollectionUtils.isEmpty(requiredCapabilities)) {
851+
return false;
852+
}
853+
return requiredCapabilities.stream().allMatch(capability -> isHostCapabilityEnabled(hostId, capability));
854+
}
855+
763856
protected void validateHostSupportsUefiNvramAwareDiskOnlySnapshots(Long hostId, UserVm userVm, String operation) {
764857
if (!isUefiVm(userVm)) {
765858
return;

0 commit comments

Comments
 (0)