Skip to content
Open
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -10042,6 +10042,7 @@ dependencies = [
"get_resources",
"guid",
"hyperv_ic_resources",
"inspect",
"jiff",
"kmsg",
"memory_range",
Expand Down
47 changes: 22 additions & 25 deletions vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1308,32 +1308,29 @@ impl<D: DeviceBacking> AsyncRun<WorkerState> for DriverWorkerTask<D> {
stop: &mut task_control::StopTask<'_>,
state: &mut WorkerState,
) -> Result<(), task_control::Cancelled> {
let r = stop
.until_stopped(async {
loop {
match self.recv.next().await {
Some(NvmeWorkerRequest::CreateIssuer(rpc)) => {
rpc.handle(async |cpu| self.create_io_issuer(state, cpu).await)
.await
}
Some(NvmeWorkerRequest::Save(rpc)) => {
rpc.handle(async |span| {
let child_span = tracing::info_span!(
parent: &span,
"nvme_worker_save",
pci_id = %self.device.id()
);
self.save(state).instrument(child_span).await
})
.await
}
None => break,
}
loop {
let cmd = stop.until_stopped(self.recv.next()).await?;
match cmd {
Some(NvmeWorkerRequest::CreateIssuer(rpc)) => {
rpc.handle(async |cpu| self.create_io_issuer(state, cpu).await)
.await
Comment thread
gurasinghMS marked this conversation as resolved.
}
})
.await;
tracing::info!(pci_id = %self.device.id(), "nvme worker task exiting");
r
Some(NvmeWorkerRequest::Save(rpc)) => {
rpc.handle(async |span| {
let child_span = tracing::info_span!(
parent: &span,
"nvme_worker_save",
pci_id = %self.device.id()
);
self.save(state).instrument(child_span).await
})
.await
}
Comment thread
gurasinghMS marked this conversation as resolved.
None => break,
}
}
tracing::info!(pci_id = %self.device.id(), "nvme worker task exiting cleanly");
Ok(())
}
}

Expand Down
1 change: 1 addition & 0 deletions vmm_tests/vmm_tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ tmk_tests.workspace = true

petri_artifacts_common.workspace = true
petri.workspace = true
inspect.workspace = true

get_resources.workspace = true
openvmm_defs.workspace = true
Expand Down
145 changes: 145 additions & 0 deletions vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,151 @@ async fn servicing_keepalive_slow_create_io_queue(
Ok(())
}

/// Verifies that save works correctly when a create_io_queue command
/// is still in flight and inspect is called on the device. Previously we saw
/// inspect calls inadvertently throwing away create_io_issuer futures and then
/// save being serviced with CREATE_IO_COMPLETION_QUEUE commands still pending.
#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])]
async fn servicing_keepalive_slow_create_io_queue_with_inspect(
config: PetriVmBuilder<OpenVmmPetriBackend>,
(igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
) -> Result<(), anyhow::Error> {
const QUEUE_CREATION_DELAY: Duration = Duration::from_secs(60);
const TRIGGER_CREATE_IO_QUEUE_TIMEOUT: Duration = Duration::from_secs(5);
const TOTAL_SAVE_TIMEOUT: Duration = Duration::from_secs(15);
Comment thread
gurasinghMS marked this conversation as resolved.

let mut flags = config.default_servicing_flags();
flags.enable_nvme_keepalive = true;
let mut fault_start_updater = CellUpdater::new(false);

let fault_configuration = FaultConfiguration::new(fault_start_updater.cell())
.with_admin_queue_fault(
AdminQueueFaultConfig::new().with_submission_queue_fault(
CommandMatchBuilder::new()
.match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
.build(),
AdminQueueFaultBehavior::Delay(QUEUE_CREATION_DELAY),
),
);

let scsi_controller_guid = Guid::new_random();
let disk_size = 4 * 1024 * 1024; // 4 MiB — enough for dd reads
let vp_count = 4;

let (mut vm, agent) = create_keepalive_test_config_custom_vps(
config,
fault_configuration,
VTL0_NVME_LUN,
scsi_controller_guid,
disk_size,
vp_count,
)
.await?;
agent.ping().await?;

let cpus_with_issuers = find_cpus_with_io_issuers(&vm).await?;
let target_cpu = (0u32..vp_count)
.find(|cpu| !cpus_with_issuers.contains(cpu))
.unwrap_or_else(|| {
panic!(
"all {vp_count} CPUs already have IO issuers after boot — \
test cannot exercise create_io_queue. Consider increasing vp_count."
)
});
tracing::info!(
target_cpu,
existing_issuers = ?cpus_with_issuers,
"selected target CPU with no IO issuer"
);

// Resolve the disk path before save. The device might appear as /dev/sda
// or /dev/sdb depending on timing.
let device_paths = get_device_paths(
&agent,
scsi_controller_guid,
vec![ExpectedGuestDevice {
lun: VTL0_NVME_LUN,
disk_size_sectors: (disk_size / SCSI_SECTOR_SIZE) as usize,
friendly_name: "nvme_disk".to_string(),
}],
)
.await?;
assert!(device_paths.len() == 1);
let disk_path = &device_paths[0];

// DEV NOTE: `run_cpu_pinned_io` only needs to be run for a duration that
// guarantees the create_io_queue command getting stuck. Ideally this should
// be event driven instead of time driven, but the infrastructure for that
// is not in place yet.
// Even though the dd command will timeout, the run loop will be stuck until
// the create_io_queue command completes.
fault_start_updater.set(true).await;
let io_result = CancelContext::new()
.with_timeout(TRIGGER_CREATE_IO_QUEUE_TIMEOUT)
.until_cancelled(run_cpu_pinned_io(&agent, disk_path, target_cpu))
.await;

assert!(
io_result.is_err(),
"IO command should have timed out. This likely means the create_io_queue command did not get injected correctly."
);

// In previous versions invoking inspect would cause the DriverWorkerTask to
// just drop the stuck create io queue command and service the save with
// pending admin commands (not good)
let nvme_device_inspect = vm.inspect_openhcl("vm/nvme/devices", None, None).await?;
tracing::info!(nvme_device_inspect = ?nvme_device_inspect, "nvme device inspected");
Comment thread
gurasinghMS marked this conversation as resolved.

let entries = match &nvme_device_inspect {
inspect::Node::Dir(entries) => entries,
_ => panic!(
"expected dir for 'vm/nvme/devices' but found {}",
nvme_device_inspect.json()
),
};
assert_eq!(
entries.len(),
1,
"expected exactly 1 NVMe device under 'vm/nvme/devices', found {}",
entries.len()
);
let nvme_device_name = entries[0].name.clone();

CancelContext::new()
.with_timeout(TOTAL_SAVE_TIMEOUT)
.until_cancelled(vm.save_openhcl(igvm_file.clone(), flags))
.await
.expect("VM save did not complete within the given timeout, even though it should have. Save is stuck when draining after restore with slow create_io_queue.")
.expect("Save failed");

fault_start_updater.set(false).await;

vm.restore_openhcl().await?;
agent.ping().await?;

let vm_inspect = vm
.inspect_openhcl(
&format!("vm/nvme/devices/{nvme_device_name}/driver/driver/admin/commands/commands"),
None,
None,
)
.await?;

Comment thread
gurasinghMS marked this conversation as resolved.
tracing::info!("vm inspected {}", vm_inspect.json());
let entries = match vm_inspect {
inspect::Node::Dir(entries) => entries,
_ => {
panic!(
"expected list of pending commands but found {}",
vm_inspect.json()
Comment thread
gurasinghMS marked this conversation as resolved.
);
}
};
assert_eq!(entries.len(), 1, "expected only 1 entry, the AER command");

Ok(())
}

async fn apply_fault_with_keepalive(
config: PetriVmBuilder<OpenVmmPetriBackend>,
fault_configuration: FaultConfiguration,
Expand Down
Loading