microsoft · gurasinghMS · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
@@ -10042,6 +10042,7 @@ dependencies = [
  "get_resources",
  "guid",
  "hyperv_ic_resources",
+ "inspect",
  "jiff",
  "kmsg",
  "memory_range",

@@ -1308,32 +1308,29 @@ impl<D: DeviceBacking> AsyncRun<WorkerState> for DriverWorkerTask<D> {
         stop: &mut task_control::StopTask<'_>,
         state: &mut WorkerState,
     ) -> Result<(), task_control::Cancelled> {
-        let r = stop
-            .until_stopped(async {
-                loop {
-                    match self.recv.next().await {
-                        Some(NvmeWorkerRequest::CreateIssuer(rpc)) => {
-                            rpc.handle(async |cpu| self.create_io_issuer(state, cpu).await)
-                                .await
-                        }
-                        Some(NvmeWorkerRequest::Save(rpc)) => {
-                            rpc.handle(async |span| {
-                                let child_span = tracing::info_span!(
-                                    parent: &span,
-                                    "nvme_worker_save",
-                                    pci_id = %self.device.id()
-                                );
-                                self.save(state).instrument(child_span).await
-                            })
-                            .await
-                        }
-                        None => break,
-                    }
+        loop {
+            let cmd = stop.until_stopped(self.recv.next()).await?;
+            match cmd {
+                Some(NvmeWorkerRequest::CreateIssuer(rpc)) => {
+                    rpc.handle(async |cpu| self.create_io_issuer(state, cpu).await)
+                        .await
                 }
-            })
-            .await;
-        tracing::info!(pci_id = %self.device.id(), "nvme worker task exiting");
-        r
+                Some(NvmeWorkerRequest::Save(rpc)) => {
+                    rpc.handle(async |span| {
+                        let child_span = tracing::info_span!(
+                            parent: &span,
+                            "nvme_worker_save",
+                            pci_id = %self.device.id()
+                        );
+                        self.save(state).instrument(child_span).await
+                    })
+                    .await
+                }
+                None => break,
+            }
+        }
+        tracing::info!(pci_id = %self.device.id(), "nvme worker task exiting cleanly");
+        Ok(())
     }
 }
 

@@ -23,6 +23,7 @@ tmk_tests.workspace = true
 
 petri_artifacts_common.workspace = true
 petri.workspace = true
+inspect.workspace = true
 
 get_resources.workspace = true
 openvmm_defs.workspace = true

@@ -1169,6 +1169,151 @@ async fn servicing_keepalive_slow_create_io_queue(
     Ok(())
 }
 
+/// Verifies that save works correctly when a create_io_queue command
+/// is still in flight and inspect is called on the device. Previously we saw
+/// inspect calls inadvertently throwing away create_io_issuer futures and then
+/// save being serviced with CREATE_IO_COMPLETION_QUEUE commands still pending.
+#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])]
+async fn servicing_keepalive_slow_create_io_queue_with_inspect(
+    config: PetriVmBuilder<OpenVmmPetriBackend>,
+    (igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
+) -> Result<(), anyhow::Error> {
+    const QUEUE_CREATION_DELAY: Duration = Duration::from_secs(60);
+    const TRIGGER_CREATE_IO_QUEUE_TIMEOUT: Duration = Duration::from_secs(5);
+    const TOTAL_SAVE_TIMEOUT: Duration = Duration::from_secs(15);
+
+    let mut flags = config.default_servicing_flags();
+    flags.enable_nvme_keepalive = true;
+    let mut fault_start_updater = CellUpdater::new(false);
+
+    let fault_configuration = FaultConfiguration::new(fault_start_updater.cell())
+        .with_admin_queue_fault(
+            AdminQueueFaultConfig::new().with_submission_queue_fault(
+                CommandMatchBuilder::new()
+                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
+                    .build(),
+                AdminQueueFaultBehavior::Delay(QUEUE_CREATION_DELAY),
+            ),
+        );
+
+    let scsi_controller_guid = Guid::new_random();
+    let disk_size = 4 * 1024 * 1024; // 4 MiB — enough for dd reads
+    let vp_count = 4;
+
+    let (mut vm, agent) = create_keepalive_test_config_custom_vps(
+        config,
+        fault_configuration,
+        VTL0_NVME_LUN,
+        scsi_controller_guid,
+        disk_size,
+        vp_count,
+    )
+    .await?;
+    agent.ping().await?;
+
+    let cpus_with_issuers = find_cpus_with_io_issuers(&vm).await?;
+    let target_cpu = (0u32..vp_count)
+        .find(|cpu| !cpus_with_issuers.contains(cpu))
+        .unwrap_or_else(|| {
+            panic!(
+                "all {vp_count} CPUs already have IO issuers after boot — \
+             test cannot exercise create_io_queue. Consider increasing vp_count."
+            )
+        });
+    tracing::info!(
+        target_cpu,
+        existing_issuers = ?cpus_with_issuers,
+        "selected target CPU with no IO issuer"
+    );
+
+    // Resolve the disk path before save. The device might appear as /dev/sda
+    // or /dev/sdb depending on timing.
+    let device_paths = get_device_paths(
+        &agent,
+        scsi_controller_guid,
+        vec![ExpectedGuestDevice {
+            lun: VTL0_NVME_LUN,
+            disk_size_sectors: (disk_size / SCSI_SECTOR_SIZE) as usize,
+            friendly_name: "nvme_disk".to_string(),
+        }],
+    )
+    .await?;
+    assert!(device_paths.len() == 1);
+    let disk_path = &device_paths[0];
+
+    // DEV NOTE: `run_cpu_pinned_io` only needs to be run for a duration that
+    // guarantees the create_io_queue command getting stuck. Ideally this should
+    // be event driven instead of time driven, but the infrastructure for that
+    // is not in place yet.
+    // Even though the dd command will timeout, the run loop will be stuck until
+    // the create_io_queue command completes.
+    fault_start_updater.set(true).await;
+    let io_result = CancelContext::new()
+        .with_timeout(TRIGGER_CREATE_IO_QUEUE_TIMEOUT)
+        .until_cancelled(run_cpu_pinned_io(&agent, disk_path, target_cpu))
+        .await;
+
+    assert!(
+        io_result.is_err(),
+        "IO command should have timed out. This likely means the create_io_queue command did not get injected correctly."
+    );
+
+    // In previous versions invoking inspect would cause the DriverWorkerTask to
+    // just drop the stuck create io queue command and service the save with
+    // pending admin commands (not good)
+    let nvme_device_inspect = vm.inspect_openhcl("vm/nvme/devices", None, None).await?;
+    tracing::info!(nvme_device_inspect = ?nvme_device_inspect, "nvme device inspected");
+
+    let entries = match &nvme_device_inspect {
+        inspect::Node::Dir(entries) => entries,
+        _ => panic!(
+            "expected dir for 'vm/nvme/devices' but found {}",
+            nvme_device_inspect.json()
+        ),
+    };
+    assert_eq!(
+        entries.len(),
+        1,
+        "expected exactly 1 NVMe device under 'vm/nvme/devices', found {}",
+        entries.len()
+    );
+    let nvme_device_name = entries[0].name.clone();
+
+    CancelContext::new()
+        .with_timeout(TOTAL_SAVE_TIMEOUT)
+        .until_cancelled(vm.save_openhcl(igvm_file.clone(), flags))
+        .await
+        .expect("VM save did not complete within the given timeout, even though it should have. Save is stuck when draining after restore with slow create_io_queue.")
+        .expect("Save failed");
+
+    fault_start_updater.set(false).await;
+
+    vm.restore_openhcl().await?;
+    agent.ping().await?;
+
+    let vm_inspect = vm
+        .inspect_openhcl(
+            &format!("vm/nvme/devices/{nvme_device_name}/driver/driver/admin/commands/commands"),
+            None,
+            None,
+        )
+        .await?;
+
+    tracing::info!("vm inspected {}", vm_inspect.json());
+    let entries = match vm_inspect {
+        inspect::Node::Dir(entries) => entries,
+        _ => {
+            panic!(
+                "expected list of pending commands but found {}",
+                vm_inspect.json()
+            );
+        }
+    };
+    assert_eq!(entries.len(), 1, "expected only 1 entry, the AER command");
+
+    Ok(())
+}
+
 async fn apply_fault_with_keepalive(
     config: PetriVmBuilder<OpenVmmPetriBackend>,
     fault_configuration: FaultConfiguration,