Skip to content

Commit 1d8a7cf

Browse files
author
Fabrício Duarte
committed
Merge branch 'fix-42006' into '4.20.0.0-scclouds'
Correção de _deadlock_ na migração de recursos entre armazenamentos secundários Closes #3195 See merge request scclouds/scclouds!1329
2 parents 9a723c3 + 16f2119 commit 1d8a7cf

3 files changed

Lines changed: 87 additions & 25 deletions

File tree

engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/DataMigrationUtility.java

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -273,12 +273,22 @@ public int compare(DataObject o1, DataObject o2) {
273273

274274
protected List<DataObject> getAllReadyTemplates(DataStore srcDataStore, Map<DataObject, Pair<List<TemplateInfo>, Long>> childTemplates, List<TemplateDataStoreVO> templates) {
275275
List<TemplateInfo> files = new LinkedList<>();
276+
Set<Long> idsForMigration = new HashSet<>();
277+
276278
for (TemplateDataStoreVO template : templates) {
277-
VMTemplateVO templateVO = templateDao.findById(template.getTemplateId());
278-
if (shouldMigrateTemplate(template, templateVO)) {
279-
files.add(templateFactory.getTemplate(template.getTemplateId(), srcDataStore));
279+
long templateId = template.getTemplateId();
280+
if (idsForMigration.contains(templateId)) {
281+
logger.warn("Template store reference [{}] is duplicated; not considering it for migration.", template);
282+
continue;
283+
}
284+
VMTemplateVO templateVO = templateDao.findById(templateId);
285+
if (!shouldMigrateTemplate(template, templateVO)) {
286+
continue;
280287
}
288+
files.add(templateFactory.getTemplate(template.getTemplateId(), srcDataStore));
289+
idsForMigration.add(templateId);
281290
}
291+
282292
for (TemplateInfo template: files) {
283293
List<VMTemplateVO> children = templateDao.listByParentTemplatetId(template.getId());
284294
List<TemplateInfo> temps = new ArrayList<>();
@@ -288,6 +298,7 @@ protected List<DataObject> getAllReadyTemplates(DataStore srcDataStore, Map<Data
288298
}
289299
childTemplates.put(template, new Pair<>(temps, getTotalChainSize(temps)));
290300
}
301+
291302
return (List<DataObject>) (List<?>) files;
292303
}
293304

@@ -382,13 +393,29 @@ protected Long getTotalChainSize(List<? extends DataObject> chain) {
382393

383394
protected List<DataObject> getAllReadyVolumes(DataStore srcDataStore, List<VolumeDataStoreVO> volumes) {
384395
List<DataObject> files = new LinkedList<>();
396+
Set<Long> idsForMigration = new HashSet<>();
397+
385398
for (VolumeDataStoreVO volume : volumes) {
386-
if (volume.getState() == ObjectInDataStoreStateMachine.State.Ready) {
387-
VolumeInfo volumeInfo = volumeFactory.getVolume(volume.getVolumeId(), srcDataStore);
388-
if (volumeInfo != null && volumeInfo.getHypervisorType() != Hypervisor.HypervisorType.Simulator) {
389-
files.add(volumeInfo);
390-
}
399+
long volumeId = volume.getVolumeId();
400+
if (idsForMigration.contains(volumeId)) {
401+
logger.warn("Volume store reference [{}] is duplicated; not considering it for migration.", volume);
402+
continue;
403+
}
404+
if (volume.getState() != ObjectInDataStoreStateMachine.State.Ready) {
405+
logger.debug("Not migrating volume [{}] because its state is not ready.", volume);
406+
continue;
407+
}
408+
VolumeInfo volumeInfo = volumeFactory.getVolume(volume.getVolumeId(), srcDataStore);
409+
if (volumeInfo == null) {
410+
logger.debug("Not migrating volume [{}] because we could not get its information.", volume);
411+
continue;
412+
}
413+
if (volumeInfo.getHypervisorType() == Hypervisor.HypervisorType.Simulator) {
414+
logger.debug("Not migrating volume [{}] because its hypervisor type is simulator.", volume);
415+
continue;
391416
}
417+
files.add(volumeInfo);
418+
idsForMigration.add(volumeId);
392419
}
393420
return files;
394421
}

engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/StorageOrchestrator.java

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,12 @@
9393
import com.cloud.utils.Pair;
9494
import com.cloud.utils.component.ManagerBase;
9595
import com.cloud.utils.exception.CloudRuntimeException;
96+
import org.apache.logging.log4j.ThreadContext;
9697

9798
public class StorageOrchestrator extends ManagerBase implements StorageOrchestrationService, Configurable {
9899

100+
private static final String LOGCONTEXTID = "logcontextid";
101+
99102
@Inject
100103
SnapshotDataStoreDao snapshotDataStoreDao;
101104
@Inject
@@ -137,6 +140,8 @@ public class StorageOrchestrator extends ManagerBase implements StorageOrchestra
137140
Integer numConcurrentCopyTasksPerSSVM = 2;
138141

139142
private final Map<Long, ThreadPoolExecutor> zoneExecutorMap = new ConcurrentHashMap<>();
143+
private final Map<Long, Integer> zonePendingWorkCountMap = new ConcurrentHashMap<>();
144+
140145
private final Map<Long, ExecutorService> zoneKvmIncrementalExecutorMap = new ConcurrentHashMap<>();
141146

142147
@Override
@@ -513,12 +518,18 @@ private void createMigrateDataTask(DataObject chosenFileForMigration, Map<DataOb
513518
}
514519

515520
protected <T> Future<T> submit(Long zoneId, Callable<T> task) {
516-
if (!zoneExecutorMap.containsKey(zoneId)) {
517-
zoneExecutorMap.put(zoneId, new ThreadPoolExecutor(numConcurrentCopyTasksPerSSVM, numConcurrentCopyTasksPerSSVM,
518-
30, TimeUnit.MINUTES, new MigrateBlockingQueue<>(numConcurrentCopyTasksPerSSVM)));
521+
ThreadPoolExecutor executor;
522+
synchronized (this) {
523+
if (!zoneExecutorMap.containsKey(zoneId)) {
524+
zoneExecutorMap.put(zoneId, new ThreadPoolExecutor(numConcurrentCopyTasksPerSSVM, numConcurrentCopyTasksPerSSVM,
525+
30, TimeUnit.MINUTES, new MigrateBlockingQueue<>(numConcurrentCopyTasksPerSSVM)));
526+
zonePendingWorkCountMap.put(zoneId, 0);
527+
}
528+
zonePendingWorkCountMap.merge(zoneId, 1, Integer::sum);
529+
scaleExecutorIfNecessary(zoneId);
530+
executor = zoneExecutorMap.get(zoneId);
519531
}
520-
scaleExecutorIfNecessary(zoneId);
521-
return zoneExecutorMap.get(zoneId).submit(task);
532+
return executor.submit(task);
522533
}
523534

524535
protected <T> Future<T> submitKvmIncrementalMigration(Long zoneId, Callable<T> task) {
@@ -540,24 +551,23 @@ protected void scaleExecutorIfNecessary(Long zoneId) {
540551
}
541552
}
542553

543-
protected void tryCleaningUpExecutor(Long zoneId) {
554+
protected synchronized void tryCleaningUpExecutor(Long zoneId) {
544555
if (!zoneExecutorMap.containsKey(zoneId)) {
545556
logger.debug("No executor exists for zone [{}].", zoneId);
546557
return;
547558
}
548559

549-
ThreadPoolExecutor executor = zoneExecutorMap.get(zoneId);
550-
synchronized (executor) {
551-
int activeTasks = executor.getActiveCount();
552-
if (activeTasks > 1) {
553-
logger.debug("Not cleaning executor of zone [{}] yet, as there are [{}] active tasks.", zoneId, activeTasks);
554-
return;
555-
}
556-
557-
logger.debug("Cleaning executor of zone [{}].", zoneId);
558-
zoneExecutorMap.remove(zoneId);
559-
executor.shutdown();
560+
zonePendingWorkCountMap.merge(zoneId, -1, Integer::sum);
561+
Integer pendingWorkCount = zonePendingWorkCountMap.get(zoneId);
562+
if (pendingWorkCount > 0) {
563+
logger.debug("Not cleaning executor of zone [{}] yet, as there is [{}] pending work.", zoneId, pendingWorkCount);
564+
return;
560565
}
566+
567+
logger.debug("Cleaning executor of zone [{}].", zoneId);
568+
ThreadPoolExecutor executor = zoneExecutorMap.get(zoneId);
569+
zoneExecutorMap.remove(zoneId);
570+
executor.shutdown();
561571
}
562572

563573
private MigrationResponse handleResponse(List<Future<DataObjectResult>> futures, MigrationPolicy migrationPolicy, String message, boolean success) {
@@ -733,10 +743,13 @@ private class MigrateDataTask implements Callable<DataObjectResult> {
733743
private DataStore destDataStore;
734744
private Map<DataObject, Pair<List<SnapshotInfo>, Long>> snapshotChain;
735745
private Map<DataObject, Pair<List<TemplateInfo>, Long>> templateChain;
746+
private String logid;
747+
736748
public MigrateDataTask(DataObject file, DataStore srcDataStore, DataStore destDataStore) {
737749
this.file = file;
738750
this.srcDataStore = srcDataStore;
739751
this.destDataStore = destDataStore;
752+
this.logid = ThreadContext.get(LOGCONTEXTID);
740753
}
741754

742755
public void setSnapshotChains(Map<DataObject, Pair<List<SnapshotInfo>, Long>> snapshotChain) {
@@ -758,6 +771,8 @@ public DataObject getFile() {
758771

759772
@Override
760773
public DataObjectResult call() {
774+
ThreadContext.put(LOGCONTEXTID, logid);
775+
761776
DataObjectResult result;
762777
AsyncCallFuture<DataObjectResult> future = secStgSrv.migrateData(file, srcDataStore, destDataStore, snapshotChain, templateChain);
763778
try {
@@ -767,22 +782,29 @@ public DataObjectResult call() {
767782
result = new DataObjectResult(file);
768783
result.setResult(e.toString());
769784
}
785+
770786
tryCleaningUpExecutor(srcDataStore.getScope().getScopeId());
787+
ThreadContext.clearAll();
788+
771789
return result;
772790
}
773791
}
774792

775793
private class CopyTemplateTask implements Callable<TemplateApiResult> {
776794
private TemplateInfo sourceTmpl;
777795
private DataStore destStore;
796+
private String logid;
778797

779798
public CopyTemplateTask(TemplateInfo sourceTmpl, DataStore destStore) {
780799
this.sourceTmpl = sourceTmpl;
781800
this.destStore = destStore;
801+
this.logid = ThreadContext.get(LOGCONTEXTID);
782802
}
783803

784804
@Override
785805
public TemplateApiResult call() {
806+
ThreadContext.put(LOGCONTEXTID, logid);
807+
786808
TemplateApiResult result;
787809
AsyncCallFuture<TemplateApiResult> future = templateService.copyTemplateToImageStore(sourceTmpl, destStore);
788810
try {
@@ -793,7 +815,10 @@ public TemplateApiResult call() {
793815
result = new TemplateApiResult(sourceTmpl);
794816
result.setResult(e.getMessage());
795817
}
818+
796819
tryCleaningUpExecutor(destStore.getScope().getScopeId());
820+
ThreadContext.clearAll();
821+
797822
return result;
798823
}
799824
}

engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/TemplateServiceImpl.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
import org.apache.cloudstack.framework.messagebus.MessageBus;
5959
import org.apache.cloudstack.framework.messagebus.PublishScope;
6060
import org.apache.cloudstack.storage.command.CommandResult;
61+
import org.apache.cloudstack.storage.command.CopyCmdAnswer;
6162
import org.apache.cloudstack.storage.command.DeleteCommand;
6263
import org.apache.cloudstack.storage.datastore.DataObjectManager;
6364
import org.apache.cloudstack.storage.datastore.ObjectInDataStoreManager;
@@ -677,6 +678,15 @@ public AsyncCallFuture<TemplateApiResult> copyTemplateToImageStore(DataObject so
677678
TemplateOpContext<TemplateApiResult> context = new TemplateOpContext<>(null, destTmpl, future);
678679
AsyncCallbackDispatcher<TemplateServiceImpl, CopyCommandResult> caller = AsyncCallbackDispatcher.create(this);
679680
caller.setCallback(caller.getTarget().copyTemplateToImageStoreCallback(null, null)).setContext(context);
681+
682+
if (source.getDataStore().getId() == destStore.getId()) {
683+
logger.debug("Destination image store [{}] is the same as the origin; returning success.");
684+
CopyCmdAnswer answer = new CopyCmdAnswer(source.getTO());
685+
CopyCommandResult result = new CopyCommandResult("", answer);
686+
caller.complete(result);
687+
return future;
688+
}
689+
680690
_motionSrv.copyAsync(sourceTmpl, destTmpl, caller);
681691
return future;
682692
}

0 commit comments

Comments
 (0)