From 0a9f8715a1e5d5d4b0d840dd205ab6d74b696453 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Fri, 1 May 2026 14:46:36 -0700
Subject: [PATCH 01/17] Should enable keepalive but only for ASAP devices

---
 .../src/nvme_manager/manager.rs               | 41 ++++++++++++++++---
 .../underhill_core/src/nvme_manager/mod.rs    | 17 ++++++++
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs
index 1b59109fec..d901748574 100644
--- a/openhcl/underhill_core/src/nvme_manager/manager.rs
+++ b/openhcl/underhill_core/src/nvme_manager/manager.rs
@@ -5,6 +5,7 @@ use crate::nvme_manager::CreateNvmeDriver;
 use crate::nvme_manager::device::NvmeDriverManager;
 use crate::nvme_manager::device::NvmeDriverManagerClient;
 use crate::nvme_manager::device::NvmeDriverShutdownOptions;
+use crate::nvme_manager::is_nvme_keepalive_compatible;
 use crate::nvme_manager::save_restore::NvmeManagerSavedState;
 use crate::nvme_manager::save_restore::NvmeSavedDiskConfig;
 use crate::servicing::NvmeSavedState;
@@ -301,12 +302,27 @@ impl NvmeManagerWorker {
 
         async {
             join_all(devices_to_shutdown.into_iter().map(|(pci_id, driver)| {
+                // nvme_keepalive is received from host but it is only valid
+                // when memory pool allocator supports save/restore. Further,
+                // as a partial mitigation for known incompatibilities between
+                // keepalive and NVMe Direct v2 devices, we only honor
+                // keepalive for ASAP devices (identified by the VPCI
+                // instance GUID containing `c05b`).
+                let host_requested_keepalive =
+                    nvme_keepalive && self.context.save_restore_supported;
+                let device_keepalive =
+                    host_requested_keepalive && is_nvme_keepalive_compatible(&pci_id);
+                if host_requested_keepalive && !device_keepalive {
+                    tracing::info!(
+                        %pci_id,
+                        "disabling nvme keepalive for non-ASAP device; \
+                         falling back to reset-on-servicing"
+                    );
+                }
                 driver
                     .shutdown(NvmeDriverShutdownOptions {
-                        // nvme_keepalive is received from host but it is only valid
-                        // when memory pool allocator supports save/restore.
-                        do_not_reset: nvme_keepalive && self.context.save_restore_supported,
-                        skip_device_shutdown: nvme_keepalive && self.context.save_restore_supported,
+                        do_not_reset: device_keepalive,
+                        skip_device_shutdown: device_keepalive,
                     })
                     .instrument(tracing::info_span!("shutdown_nvme_driver", %pci_id))
             }))
@@ -419,12 +435,27 @@ impl NvmeManagerWorker {
     /// Saves NVMe device's states into buffer during servicing.
     pub async fn save(&mut self) -> anyhow::Result<NvmeManagerSavedState> {
         let mut nvme_disks: Vec<NvmeSavedDiskConfig> = Vec::new();
+        // Only save state for devices known to be keepalive-compatible.
+        // Non-ASAP (NVMe Direct v2) devices will be reset on servicing, so
+        // persisting their state would be a waste and could conflict with
+        // the reset path on restore.
         let mut devices_to_save: HashMap<String, NvmeDriverManagerClient> = self
             .context
             .devices
             .write()
             .iter()
-            .map(|(pci_id, driver)| (pci_id.clone(), driver.client().clone()))
+            .filter_map(|(pci_id, driver)| {
+                if is_nvme_keepalive_compatible(pci_id) {
+                    Some((pci_id.clone(), driver.client().clone()))
+                } else {
+                    tracing::info!(
+                        %pci_id,
+                        "skipping save of non-ASAP nvme device; \
+                         keepalive disabled for this device"
+                    );
+                    None
+                }
+            })
             .collect();
         for (pci_id, client) in devices_to_save.iter_mut() {
             nvme_disks.push(NvmeSavedDiskConfig {
diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index cc8977de45..9ac604f0a9 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -111,3 +111,20 @@ pub trait CreateNvmeDriver: Inspect + Send + Sync {
         saved_state: Option<&nvme_driver::save_restore::NvmeDriverSavedState>,
     ) -> Result<Box<dyn NvmeDevice>, NvmeSpawnerError>;
 }
+
+/// Returns whether NVMe keepalive (servicing without device reset) is
+/// known to be compatible with the device identified by this VPCI
+/// instance ID.
+///
+/// NVMe keepalive currently has known incompatibilities with NVMe Direct
+/// v2 devices. As a partial mitigation, we only enable keepalive for
+/// ASAP devices, which can be identified by the substring `c05b` in the
+/// VPCI instance GUID. All other devices (NVMe Direct v2) fall back to
+/// the legacy reset-on-servicing path even when the host-level keepalive
+/// flag is enabled.
+///
+/// This heuristic is intentionally fragile and is intended only as a
+/// temporary mitigation while the real compatibility work is in flight.
+pub fn is_nvme_keepalive_compatible(pci_id: &str) -> bool {
+    pci_id.to_ascii_lowercase().starts_with("c05b")
+}

From aec5033ad44ed1f6e58b107833c038b0a1894569 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Fri, 1 May 2026 15:54:07 -0700
Subject: [PATCH 02/17] I think this is in better shape

---
 .../src/nvme_manager/manager.rs               | 32 +++++--------------
 .../underhill_core/src/nvme_manager/mod.rs    | 15 ++-------
 2 files changed, 10 insertions(+), 37 deletions(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs
index d901748574..5ef43eb555 100644
--- a/openhcl/underhill_core/src/nvme_manager/manager.rs
+++ b/openhcl/underhill_core/src/nvme_manager/manager.rs
@@ -302,27 +302,14 @@ impl NvmeManagerWorker {
 
         async {
             join_all(devices_to_shutdown.into_iter().map(|(pci_id, driver)| {
-                // nvme_keepalive is received from host but it is only valid
-                // when memory pool allocator supports save/restore. Further,
-                // as a partial mitigation for known incompatibilities between
-                // keepalive and NVMe Direct v2 devices, we only honor
-                // keepalive for ASAP devices (identified by the VPCI
-                // instance GUID containing `c05b`).
-                let host_requested_keepalive =
-                    nvme_keepalive && self.context.save_restore_supported;
-                let device_keepalive =
-                    host_requested_keepalive && is_nvme_keepalive_compatible(&pci_id);
-                if host_requested_keepalive && !device_keepalive {
-                    tracing::info!(
-                        %pci_id,
-                        "disabling nvme keepalive for non-ASAP device; \
-                         falling back to reset-on-servicing"
-                    );
-                }
                 driver
                     .shutdown(NvmeDriverShutdownOptions {
-                        do_not_reset: device_keepalive,
-                        skip_device_shutdown: device_keepalive,
+                        do_not_reset: nvme_keepalive
+                            && self.context.save_restore_supported
+                            && is_nvme_keepalive_compatible(&pci_id),
+                        skip_device_shutdown: nvme_keepalive
+                            && self.context.save_restore_supported
+                            && !is_nvme_keepalive_compatible(&pci_id),
                     })
                     .instrument(tracing::info_span!("shutdown_nvme_driver", %pci_id))
             }))
@@ -375,7 +362,7 @@ impl NvmeManagerWorker {
                             &context.driver_source,
                             &pci_id,
                             context.vp_count,
-                            context.save_restore_supported,
+                            context.save_restore_supported && is_nvme_keepalive_compatible(&pci_id),
                             None, // No device yet,
                             context.nvme_driver_spawner.clone(),
                         )?;
@@ -436,9 +423,6 @@ impl NvmeManagerWorker {
     pub async fn save(&mut self) -> anyhow::Result<NvmeManagerSavedState> {
         let mut nvme_disks: Vec<NvmeSavedDiskConfig> = Vec::new();
         // Only save state for devices known to be keepalive-compatible.
-        // Non-ASAP (NVMe Direct v2) devices will be reset on servicing, so
-        // persisting their state would be a waste and could conflict with
-        // the reset path on restore.
         let mut devices_to_save: HashMap<String, NvmeDriverManagerClient> = self
             .context
             .devices
@@ -450,7 +434,7 @@ impl NvmeManagerWorker {
                 } else {
                     tracing::info!(
                         %pci_id,
-                        "skipping save of non-ASAP nvme device; \
+                        "skipping save of nvme device; \
                          keepalive disabled for this device"
                     );
                     None
diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index 9ac604f0a9..33b300993d 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -112,19 +112,8 @@ pub trait CreateNvmeDriver: Inspect + Send + Sync {
     ) -> Result<Box<dyn NvmeDevice>, NvmeSpawnerError>;
 }
 
-/// Returns whether NVMe keepalive (servicing without device reset) is
-/// known to be compatible with the device identified by this VPCI
-/// instance ID.
-///
-/// NVMe keepalive currently has known incompatibilities with NVMe Direct
-/// v2 devices. As a partial mitigation, we only enable keepalive for
-/// ASAP devices, which can be identified by the substring `c05b` in the
-/// VPCI instance GUID. All other devices (NVMe Direct v2) fall back to
-/// the legacy reset-on-servicing path even when the host-level keepalive
-/// flag is enabled.
-///
-/// This heuristic is intentionally fragile and is intended only as a
-/// temporary mitigation while the real compatibility work is in flight.
+/// Returns whether the given PCI ID corresponds to an NVMe device that is
+/// compatible with the keepalive.
 pub fn is_nvme_keepalive_compatible(pci_id: &str) -> bool {
     pci_id.to_ascii_lowercase().starts_with("c05b")
 }

From 1c8bc557cbc8f1e03b27f64c02c81d866e2bc3a8 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Fri, 1 May 2026 16:05:17 -0700
Subject: [PATCH 03/17] Don't start with keepalive either

---
 openhcl/underhill_core/src/nvme_manager/manager.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs
index 5ef43eb555..7934dacb64 100644
--- a/openhcl/underhill_core/src/nvme_manager/manager.rs
+++ b/openhcl/underhill_core/src/nvme_manager/manager.rs
@@ -304,6 +304,8 @@ impl NvmeManagerWorker {
             join_all(devices_to_shutdown.into_iter().map(|(pci_id, driver)| {
                 driver
                     .shutdown(NvmeDriverShutdownOptions {
+                        // nvme_keepalive is received from host but it is only valid
+                        // when memory pool allocator supports save/restore.
                         do_not_reset: nvme_keepalive
                             && self.context.save_restore_supported
                             && is_nvme_keepalive_compatible(&pci_id),

From 30b5bad61e623443e5c7733db402d10b049962d0 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Fri, 1 May 2026 16:07:08 -0700
Subject: [PATCH 04/17] Setting up the flags correctly

---
 openhcl/underhill_core/src/nvme_manager/manager.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs
index 7934dacb64..7b04d9e3bd 100644
--- a/openhcl/underhill_core/src/nvme_manager/manager.rs
+++ b/openhcl/underhill_core/src/nvme_manager/manager.rs
@@ -311,7 +311,7 @@ impl NvmeManagerWorker {
                             && is_nvme_keepalive_compatible(&pci_id),
                         skip_device_shutdown: nvme_keepalive
                             && self.context.save_restore_supported
-                            && !is_nvme_keepalive_compatible(&pci_id),
+                            && is_nvme_keepalive_compatible(&pci_id),
                     })
                     .instrument(tracing::info_span!("shutdown_nvme_driver", %pci_id))
             }))

From 6076ef667d49ebbb9c75f34efa44e12b0dc55ce1 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Mon, 4 May 2026 11:54:59 -0700
Subject: [PATCH 05/17] Added a test to verify that only ASAP devices are being
 reset

---
 .../tests/multiarch/openhcl_servicing.rs      | 185 ++++++++++++++++++
 1 file changed, 185 insertions(+)

diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
index e3c74aa96f..6072e0907c 100644
--- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
+++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
@@ -1169,6 +1169,191 @@ async fn servicing_keepalive_slow_create_io_queue(
     Ok(())
 }
 
+/// Verifies the per-device NVMe keepalive gate.
+///
+/// Two NVMe controllers are attached to a single VM:
+/// * An "ASAP" device whose VPCI instance ID has `data2 = 0xc05b`, which
+///   makes its derived pci_id start with `c05b:` (matching the
+///   keepalive-compatible heuristic). Keepalive must be honored for
+///   this device — its controller is not reset across servicing, so
+///   `CREATE_IO_COMPLETION_QUEUE` must NOT be issued after servicing.
+///   The fault panics if the opcode is observed.
+/// * An "NVMe Direct v2" device whose pci_id does not start with
+///   `c05b:`. Keepalive must be downgraded to reset-on-servicing for
+///   this device, so `CREATE_IO_COMPLETION_QUEUE` MUST be issued after
+///   servicing. The fault verifies (via a oneshot) that the opcode is
+///   observed.
+///
+/// Keepalive is enabled VM-wide; the gate is expected to apply per
+/// device based on its pci_id.
+#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])]
+async fn servicing_keepalive_per_device_gate(
+    config: PetriVmBuilder<OpenVmmPetriBackend>,
+    (igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
+) -> Result<(), anyhow::Error> {
+    let mut flags = config.default_servicing_flags();
+    flags.enable_nvme_keepalive = true;
+
+    // VPCI instance IDs. The pci_id seen by the NVMe driver is derived
+    // as `format!("{:04x}:00:00.0", instance_id.data2)` (see
+    // `vpci_path` in `dispatch/vtl2_settings_worker.rs`), so `data2`
+    // controls the prefix of the pci_id. For ASAP we set data2 = 0xc05b
+    // so its pci_id is `c05b:00:00.0`.
+    const ASAP_NVME_INSTANCE: Guid = guid::guid!("00000000-c05b-0000-0000-000000000001");
+    const ND2_NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
+
+    const ASAP_NSID: u32 = KEEPALIVE_VTL2_NSID;
+    const ND2_NSID: u32 = KEEPALIVE_VTL2_NSID + 1;
+    const ASAP_LUN: u32 = VTL0_NVME_LUN;
+    const ND2_LUN: u32 = VTL0_NVME_LUN + 1;
+
+    // Two independent fault start cells — one per device.
+    let mut asap_fault_updater = CellUpdater::new(false);
+    let mut nd2_fault_updater = CellUpdater::new(false);
+
+    let (nd2_create_seen_send, nd2_create_seen_recv) = mesh::oneshot::<()>();
+
+    // ASAP (c05b) device: keepalive must be honored — fail loudly if any
+    // CREATE_IO_COMPLETION_QUEUE is observed after the fault is armed.
+    let asap_fault_config = FaultConfiguration::new(asap_fault_updater.cell())
+        .with_admin_queue_fault(
+            AdminQueueFaultConfig::new().with_submission_queue_fault(
+                CommandMatchBuilder::new()
+                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
+                    .build(),
+                AdminQueueFaultBehavior::Panic(
+                    "ASAP (c05b) device received CREATE_IO_COMPLETION_QUEUE after servicing — \
+                     keepalive should have been honored for this device but the controller \
+                     was reset."
+                        .to_string(),
+                ),
+            ),
+        );
+
+    // NVMe Direct v2 (non-c05b) device: keepalive must be downgraded —
+    // verify CREATE_IO_COMPLETION_QUEUE IS issued after servicing.
+    let nd2_fault_config = FaultConfiguration::new(nd2_fault_updater.cell())
+        .with_admin_queue_fault(
+            AdminQueueFaultConfig::new().with_submission_queue_fault(
+                CommandMatchBuilder::new()
+                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
+                    .build(),
+                AdminQueueFaultBehavior::Verify(Some(nd2_create_seen_send)),
+            ),
+        );
+
+    let scsi_instance = Guid::new_random();
+
+    let (mut vm, agent) = config
+        .with_vmbus_redirect(true)
+        .with_openhcl_command_line(
+            "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+        )
+        .modify_backend(move |b| {
+            b.with_custom_config(move |c| {
+                c.vpci_devices.push(VpciDeviceConfig {
+                    vtl: DeviceVtl::Vtl2,
+                    instance_id: ASAP_NVME_INSTANCE,
+                    resource: NvmeFaultControllerHandle {
+                        subsystem_id: Guid::new_random(),
+                        msix_count: 10,
+                        max_io_queues: 10,
+                        namespaces: vec![NamespaceDefinition {
+                            nsid: ASAP_NSID,
+                            read_only: false,
+                            disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
+                                len: Some(DEFAULT_DISK_SIZE),
+                                sector_size: None,
+                            })
+                            .into_resource(),
+                        }],
+                        fault_config: asap_fault_config,
+                        enable_tdisp_tests: false,
+                    }
+                    .into_resource(),
+                });
+                c.vpci_devices.push(VpciDeviceConfig {
+                    vtl: DeviceVtl::Vtl2,
+                    instance_id: ND2_NVME_INSTANCE,
+                    resource: NvmeFaultControllerHandle {
+                        subsystem_id: Guid::new_random(),
+                        msix_count: 10,
+                        max_io_queues: 10,
+                        namespaces: vec![NamespaceDefinition {
+                            nsid: ND2_NSID,
+                            read_only: false,
+                            disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
+                                len: Some(DEFAULT_DISK_SIZE),
+                                sector_size: None,
+                            })
+                            .into_resource(),
+                        }],
+                        fault_config: nd2_fault_config,
+                        enable_tdisp_tests: false,
+                    }
+                    .into_resource(),
+                });
+            })
+        })
+        .add_vtl2_storage_controller(
+            Vtl2StorageControllerBuilder::new(ControllerType::Scsi)
+                .with_instance_id(scsi_instance)
+                .add_lun(
+                    Vtl2LunBuilder::disk()
+                        .with_location(ASAP_LUN)
+                        .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
+                            ControllerType::Nvme,
+                            ASAP_NVME_INSTANCE,
+                            ASAP_NSID,
+                        )),
+                )
+                .add_lun(
+                    Vtl2LunBuilder::disk()
+                        .with_location(ND2_LUN)
+                        .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
+                            ControllerType::Nvme,
+                            ND2_NVME_INSTANCE,
+                            ND2_NSID,
+                        )),
+                )
+                .build(),
+        )
+        .run()
+        .await?;
+
+    agent.ping().await?;
+
+    // Arm both faults BEFORE servicing so that the post-servicing
+    // CREATE_IO_COMPLETION_QUEUE is what each fault matches.
+    asap_fault_updater.set(true).await;
+    nd2_fault_updater.set(true).await;
+
+    vm.restart_openhcl(igvm_file.clone(), flags).await?;
+
+    agent.ping().await?;
+
+    // The non-c05b device must issue CREATE_IO_COMPLETION_QUEUE after
+    // servicing because its keepalive was downgraded to reset.
+    CancelContext::new()
+        .with_timeout(Duration::from_secs(60))
+        .until_cancelled(nd2_create_seen_recv)
+        .await
+        .expect(
+            "non-c05b NVMe device did not issue CREATE_IO_COMPLETION_QUEUE within 60s after \
+             servicing — the per-device keepalive gate did not downgrade keepalive for this \
+             device.",
+        )
+        .expect("CREATE_IO_COMPLETION_QUEUE verification on non-c05b device failed");
+
+    // If the c05b device had received CREATE_IO_COMPLETION_QUEUE, the
+    // panic fault would have crashed the VM and failed this test. We
+    // disarm the faults defensively before tearing down.
+    asap_fault_updater.set(false).await;
+    nd2_fault_updater.set(false).await;
+
+    Ok(())
+}
+
 async fn apply_fault_with_keepalive(
     config: PetriVmBuilder<OpenVmmPetriBackend>,
     fault_configuration: FaultConfiguration,

From 1e19679ae91be3471fb92e09a1e0fba7a53b3f7c Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Mon, 4 May 2026 12:01:57 -0700
Subject: [PATCH 06/17] Update comments

---
 .../vmm_tests/tests/tests/multiarch/openhcl_servicing.rs      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
index 6072e0907c..2696f462b3 100644
--- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
+++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
@@ -1178,7 +1178,7 @@ async fn servicing_keepalive_slow_create_io_queue(
 ///   this device — its controller is not reset across servicing, so
 ///   `CREATE_IO_COMPLETION_QUEUE` must NOT be issued after servicing.
 ///   The fault panics if the opcode is observed.
-/// * An "NVMe Direct v2" device whose pci_id does not start with
+/// * A device whose pci_id does not start with
 ///   `c05b:`. Keepalive must be downgraded to reset-on-servicing for
 ///   this device, so `CREATE_IO_COMPLETION_QUEUE` MUST be issued after
 ///   servicing. The fault verifies (via a oneshot) that the opcode is
@@ -1230,7 +1230,7 @@ async fn servicing_keepalive_per_device_gate(
             ),
         );
 
-    // NVMe Direct v2 (non-c05b) device: keepalive must be downgraded —
+    // Non c05b device: keepalive must be downgraded —
     // verify CREATE_IO_COMPLETION_QUEUE IS issued after servicing.
     let nd2_fault_config = FaultConfiguration::new(nd2_fault_updater.cell())
         .with_admin_queue_fault(

From fc20c570ad5097794b540506d20400f35b9d7a8d Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Mon, 4 May 2026 12:17:55 -0700
Subject: [PATCH 07/17] Added comment fix and removed magic numbers out of the
 function definition

---
 openhcl/underhill_core/src/nvme_manager/mod.rs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index 33b300993d..b0adf280e0 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -67,6 +67,10 @@ pub struct NamespaceError {
     source: NvmeSpawnerError,
 }
 
+/// PCI vendor-ID prefix for NVMe devices that support keepalive, including
+/// the expected `:` separator in the `dddd:bb:dd.f` PCI ID format.
+const KEEPALIVE_COMPATIBLE_PCI_VENDOR_PREFIX: &str = "c05b:";
+
 #[derive(Debug, Error)]
 pub enum NvmeSpawnerError {
     #[error("failed to initialize vfio device")]
@@ -114,6 +118,9 @@ pub trait CreateNvmeDriver: Inspect + Send + Sync {
 
 /// Returns whether the given PCI ID corresponds to an NVMe device that is
 /// compatible with the keepalive.
+/// DEV_NOTE: This is a heuristic based on the PCI vendor ID, which is not ideal but is necessary
 pub fn is_nvme_keepalive_compatible(pci_id: &str) -> bool {
-    pci_id.to_ascii_lowercase().starts_with("c05b")
+    pci_id
+        .to_ascii_lowercase()
+        .starts_with(KEEPALIVE_COMPATIBLE_PCI_VENDOR_PREFIX)
 }

From 4c15d5d586a1539036e95e77e7ea6d0faf0fcb43 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Mon, 4 May 2026 12:32:33 -0700
Subject: [PATCH 08/17] More variable name cleanup

---
 .../underhill_core/src/nvme_manager/mod.rs    |  2 +-
 .../tests/multiarch/openhcl_servicing.rs      | 66 +++++++++----------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index b0adf280e0..d2540d7d76 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -119,7 +119,7 @@ pub trait CreateNvmeDriver: Inspect + Send + Sync {
 /// Returns whether the given PCI ID corresponds to an NVMe device that is
 /// compatible with the keepalive.
 /// DEV_NOTE: This is a heuristic based on the PCI vendor ID, which is not ideal but is necessary
-pub fn is_nvme_keepalive_compatible(pci_id: &str) -> bool {
+pub(crate) fn is_nvme_keepalive_compatible(pci_id: &str) -> bool {
     pci_id
         .to_ascii_lowercase()
         .starts_with(KEEPALIVE_COMPATIBLE_PCI_VENDOR_PREFIX)
diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
index 2696f462b3..3c654d51dc 100644
--- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
+++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
@@ -1172,7 +1172,7 @@ async fn servicing_keepalive_slow_create_io_queue(
 /// Verifies the per-device NVMe keepalive gate.
 ///
 /// Two NVMe controllers are attached to a single VM:
-/// * An "ASAP" device whose VPCI instance ID has `data2 = 0xc05b`, which
+/// * A "c05b" device whose VPCI instance ID has `data2 = 0xc05b`, which
 ///   makes its derived pci_id start with `c05b:` (matching the
 ///   keepalive-compatible heuristic). Keepalive must be honored for
 ///   this device — its controller is not reset across servicing, so
@@ -1197,32 +1197,32 @@ async fn servicing_keepalive_per_device_gate(
     // VPCI instance IDs. The pci_id seen by the NVMe driver is derived
     // as `format!("{:04x}:00:00.0", instance_id.data2)` (see
     // `vpci_path` in `dispatch/vtl2_settings_worker.rs`), so `data2`
-    // controls the prefix of the pci_id. For ASAP we set data2 = 0xc05b
+    // controls the prefix of the pci_id. For the device with keepalive we set data2 = 0xc05b
     // so its pci_id is `c05b:00:00.0`.
-    const ASAP_NVME_INSTANCE: Guid = guid::guid!("00000000-c05b-0000-0000-000000000001");
-    const ND2_NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
+    const WITH_KEEPALIVE_NVME_INSTANCE: Guid = guid::guid!("00000000-c05b-0000-0000-000000000001");
+    const NO_KEEPALIVE_NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
 
-    const ASAP_NSID: u32 = KEEPALIVE_VTL2_NSID;
-    const ND2_NSID: u32 = KEEPALIVE_VTL2_NSID + 1;
-    const ASAP_LUN: u32 = VTL0_NVME_LUN;
-    const ND2_LUN: u32 = VTL0_NVME_LUN + 1;
+    const WITH_KEEPALIVE_NSID: u32 = KEEPALIVE_VTL2_NSID;
+    const NO_KEEPALIVE_NSID: u32 = KEEPALIVE_VTL2_NSID + 1;
+    const WITH_KEEPALIVE_LUN: u32 = VTL0_NVME_LUN;
+    const NO_KEEPALIVE_LUN: u32 = VTL0_NVME_LUN + 1;
 
     // Two independent fault start cells — one per device.
-    let mut asap_fault_updater = CellUpdater::new(false);
-    let mut nd2_fault_updater = CellUpdater::new(false);
+    let mut with_keepalive_fault_updater = CellUpdater::new(false);
+    let mut no_keepalive_fault_updater = CellUpdater::new(false);
 
-    let (nd2_create_seen_send, nd2_create_seen_recv) = mesh::oneshot::<()>();
+    let (no_keepalive_create_seen_send, no_keepalive_create_seen_recv) = mesh::oneshot::<()>();
 
-    // ASAP (c05b) device: keepalive must be honored — fail loudly if any
+    // c05b device: keepalive must be honored — fail loudly if any
     // CREATE_IO_COMPLETION_QUEUE is observed after the fault is armed.
-    let asap_fault_config = FaultConfiguration::new(asap_fault_updater.cell())
+    let with_keepalive_fault_config = FaultConfiguration::new(with_keepalive_fault_updater.cell())
         .with_admin_queue_fault(
             AdminQueueFaultConfig::new().with_submission_queue_fault(
                 CommandMatchBuilder::new()
                     .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
                     .build(),
                 AdminQueueFaultBehavior::Panic(
-                    "ASAP (c05b) device received CREATE_IO_COMPLETION_QUEUE after servicing — \
+                    "c05b device received CREATE_IO_COMPLETION_QUEUE after servicing — \
                      keepalive should have been honored for this device but the controller \
                      was reset."
                         .to_string(),
@@ -1232,13 +1232,13 @@ async fn servicing_keepalive_per_device_gate(
 
     // Non c05b device: keepalive must be downgraded —
     // verify CREATE_IO_COMPLETION_QUEUE IS issued after servicing.
-    let nd2_fault_config = FaultConfiguration::new(nd2_fault_updater.cell())
+    let no_keepalive_fault_config = FaultConfiguration::new(no_keepalive_fault_updater.cell())
         .with_admin_queue_fault(
             AdminQueueFaultConfig::new().with_submission_queue_fault(
                 CommandMatchBuilder::new()
                     .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
                     .build(),
-                AdminQueueFaultBehavior::Verify(Some(nd2_create_seen_send)),
+                AdminQueueFaultBehavior::Verify(Some(no_keepalive_create_seen_send)),
             ),
         );
 
@@ -1253,13 +1253,13 @@ async fn servicing_keepalive_per_device_gate(
             b.with_custom_config(move |c| {
                 c.vpci_devices.push(VpciDeviceConfig {
                     vtl: DeviceVtl::Vtl2,
-                    instance_id: ASAP_NVME_INSTANCE,
+                    instance_id: WITH_KEEPALIVE_NVME_INSTANCE,
                     resource: NvmeFaultControllerHandle {
                         subsystem_id: Guid::new_random(),
                         msix_count: 10,
                         max_io_queues: 10,
                         namespaces: vec![NamespaceDefinition {
-                            nsid: ASAP_NSID,
+                            nsid: WITH_KEEPALIVE_NSID,
                             read_only: false,
                             disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
                                 len: Some(DEFAULT_DISK_SIZE),
@@ -1267,20 +1267,20 @@ async fn servicing_keepalive_per_device_gate(
                             })
                             .into_resource(),
                         }],
-                        fault_config: asap_fault_config,
+                        fault_config: with_keepalive_fault_config,
                         enable_tdisp_tests: false,
                     }
                     .into_resource(),
                 });
                 c.vpci_devices.push(VpciDeviceConfig {
                     vtl: DeviceVtl::Vtl2,
-                    instance_id: ND2_NVME_INSTANCE,
+                    instance_id: NO_KEEPALIVE_NVME_INSTANCE,
                     resource: NvmeFaultControllerHandle {
                         subsystem_id: Guid::new_random(),
                         msix_count: 10,
                         max_io_queues: 10,
                         namespaces: vec![NamespaceDefinition {
-                            nsid: ND2_NSID,
+                            nsid: NO_KEEPALIVE_NSID,
                             read_only: false,
                             disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
                                 len: Some(DEFAULT_DISK_SIZE),
@@ -1288,7 +1288,7 @@ async fn servicing_keepalive_per_device_gate(
                             })
                             .into_resource(),
                         }],
-                        fault_config: nd2_fault_config,
+                        fault_config: no_keepalive_fault_config,
                         enable_tdisp_tests: false,
                     }
                     .into_resource(),
@@ -1300,20 +1300,20 @@ async fn servicing_keepalive_per_device_gate(
                 .with_instance_id(scsi_instance)
                 .add_lun(
                     Vtl2LunBuilder::disk()
-                        .with_location(ASAP_LUN)
+                        .with_location(WITH_KEEPALIVE_LUN)
                         .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
                             ControllerType::Nvme,
-                            ASAP_NVME_INSTANCE,
-                            ASAP_NSID,
+                            WITH_KEEPALIVE_NVME_INSTANCE,
+                            WITH_KEEPALIVE_NSID,
                         )),
                 )
                 .add_lun(
                     Vtl2LunBuilder::disk()
-                        .with_location(ND2_LUN)
+                        .with_location(NO_KEEPALIVE_LUN)
                         .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
                             ControllerType::Nvme,
-                            ND2_NVME_INSTANCE,
-                            ND2_NSID,
+                            NO_KEEPALIVE_NVME_INSTANCE,
+                            NO_KEEPALIVE_NSID,
                         )),
                 )
                 .build(),
@@ -1325,8 +1325,8 @@ async fn servicing_keepalive_per_device_gate(
 
     // Arm both faults BEFORE servicing so that the post-servicing
     // CREATE_IO_COMPLETION_QUEUE is what each fault matches.
-    asap_fault_updater.set(true).await;
-    nd2_fault_updater.set(true).await;
+    with_keepalive_fault_updater.set(true).await;
+    no_keepalive_fault_updater.set(true).await;
 
     vm.restart_openhcl(igvm_file.clone(), flags).await?;
 
@@ -1336,7 +1336,7 @@ async fn servicing_keepalive_per_device_gate(
     // servicing because its keepalive was downgraded to reset.
     CancelContext::new()
         .with_timeout(Duration::from_secs(60))
-        .until_cancelled(nd2_create_seen_recv)
+        .until_cancelled(no_keepalive_create_seen_recv)
         .await
         .expect(
             "non-c05b NVMe device did not issue CREATE_IO_COMPLETION_QUEUE within 60s after \
@@ -1348,8 +1348,8 @@ async fn servicing_keepalive_per_device_gate(
     // If the c05b device had received CREATE_IO_COMPLETION_QUEUE, the
     // panic fault would have crashed the VM and failed this test. We
     // disarm the faults defensively before tearing down.
-    asap_fault_updater.set(false).await;
-    nd2_fault_updater.set(false).await;
+    with_keepalive_fault_updater.set(false).await;
+    no_keepalive_fault_updater.set(false).await;
 
     Ok(())
 }

From 4c9456c82e152ccfe46312caca93e659d3c30b3d Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Mon, 4 May 2026 13:22:00 -0700
Subject: [PATCH 09/17] Fixed comments

---
 openhcl/underhill_core/src/nvme_manager/mod.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index d2540d7d76..0d8b2a620c 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -67,9 +67,9 @@ pub struct NamespaceError {
     source: NvmeSpawnerError,
 }
 
-/// PCI vendor-ID prefix for NVMe devices that support keepalive, including
-/// the expected `:` separator in the `dddd:bb:dd.f` PCI ID format.
-const KEEPALIVE_COMPATIBLE_PCI_VENDOR_PREFIX: &str = "c05b:";
+/// Device prefix for nvme devices that are compatibble with keepalive.
+/// the "dddd:" in the `dddd:bb:dd.f` PCI ID format.
+const KEEPALIVE_COMPATIBLE_DEVICE_PREFIX: &str = "c05b:";
 
 #[derive(Debug, Error)]
 pub enum NvmeSpawnerError {
@@ -118,9 +118,9 @@ pub trait CreateNvmeDriver: Inspect + Send + Sync {
 
 /// Returns whether the given PCI ID corresponds to an NVMe device that is
 /// compatible with the keepalive.
-/// DEV_NOTE: This is a heuristic based on the PCI vendor ID, which is not ideal but is necessary
+/// DEV_NOTE: This is a weak heuristic based approach which is not ideal but is necessary
 pub(crate) fn is_nvme_keepalive_compatible(pci_id: &str) -> bool {
     pci_id
         .to_ascii_lowercase()
-        .starts_with(KEEPALIVE_COMPATIBLE_PCI_VENDOR_PREFIX)
+        .starts_with(KEEPALIVE_COMPATIBLE_DEVICE_PREFIX)
 }

From dbab2149d7599cd394b3f18c11bdc64beecb83ab Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Mon, 4 May 2026 13:30:06 -0700
Subject: [PATCH 10/17] Will no longer restore devices that are not keepalive
 comptible. Moreover, it will tear down that device state and start again ...
 at least it should

---
 openhcl/underhill_core/src/nvme_manager/manager.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs
index 7b04d9e3bd..ee14680a94 100644
--- a/openhcl/underhill_core/src/nvme_manager/manager.rs
+++ b/openhcl/underhill_core/src/nvme_manager/manager.rs
@@ -465,6 +465,9 @@ impl NvmeManagerWorker {
         let mut restored_devices: HashMap<String, NvmeDriverManager> = HashMap::new();
 
         for disk in &saved_state.nvme_disks {
+            // Only restore disks that are known to be keepalive compatible.
+            // This should gracefully tear down state for non-keepalive devices
+            // before using the device.
             let pci_id = disk.pci_id.clone();
             let nvme_driver = self
                 .context
@@ -473,7 +476,7 @@ impl NvmeManagerWorker {
                     &self.context.driver_source,
                     &pci_id,
                     saved_state.cpu_count,
-                    save_restore_supported,
+                    save_restore_supported && is_nvme_keepalive_compatible(&pci_id),
                     Some(&disk.driver_state),
                 )
                 .await?;

From 6412280ee8d66b6b19c3dc53ef346f7ab73adf21 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Mon, 4 May 2026 16:55:20 -0700
Subject: [PATCH 11/17] Now using vendor and device ID information instead of
 using pci ID to figure out keepalive info

---
 .../underhill_core/src/nvme_manager/device.rs | 10 +++
 .../src/nvme_manager/manager.rs               | 37 +++++++---
 .../underhill_core/src/nvme_manager/mod.rs    | 55 ++++++++++++---
 .../storage/nvme_resources/src/fault.rs       | 68 +++++++++++++++++++
 vm/devices/storage/nvme_test/src/pci.rs       | 15 +++-
 .../tests/multiarch/openhcl_servicing.rs      | 30 ++++----
 6 files changed, 184 insertions(+), 31 deletions(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/device.rs b/openhcl/underhill_core/src/nvme_manager/device.rs
index 3fb4892297..e9bda747b1 100644
--- a/openhcl/underhill_core/src/nvme_manager/device.rs
+++ b/openhcl/underhill_core/src/nvme_manager/device.rs
@@ -334,6 +334,9 @@ enum NvmeDriverRequest {
 pub struct NvmeDriverManager {
     task: Task<()>,
     pci_id: String,
+    /// Whether the underlying device is compatible with NVMe keepalive
+    /// (cached at construction time so callers don't have to re-read sysfs).
+    keepalive_compatible: bool,
     client: NvmeDriverManagerClient,
 }
 
@@ -358,12 +361,18 @@ impl NvmeDriverManager {
         &self.client
     }
 
+    /// Returns whether the underlying device is compatible with NVMe keepalive.
+    pub fn keepalive_compatible(&self) -> bool {
+        self.keepalive_compatible
+    }
+
     /// Creates the [`NvmeDriverManager`].
     pub fn new(
         driver_source: &VmTaskDriverSource,
         pci_id: &str,
         vp_count: u32,
         save_restore_supported: bool,
+        keepalive_compatible: bool,
         device: Option<Box<dyn NvmeDevice>>,
         nvme_driver_spawner: Arc<dyn CreateNvmeDriver>,
     ) -> anyhow::Result<Self> {
@@ -382,6 +391,7 @@ impl NvmeDriverManager {
         Ok(Self {
             task,
             pci_id: pci_id.into(),
+            keepalive_compatible,
             client: NvmeDriverManagerClient {
                 pci_id: pci_id.into(),
                 sender: send,
diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs
index ee14680a94..13f86430a7 100644
--- a/openhcl/underhill_core/src/nvme_manager/manager.rs
+++ b/openhcl/underhill_core/src/nvme_manager/manager.rs
@@ -302,16 +302,17 @@ impl NvmeManagerWorker {
 
         async {
             join_all(devices_to_shutdown.into_iter().map(|(pci_id, driver)| {
+                let keepalive_compatible = driver.keepalive_compatible();
                 driver
                     .shutdown(NvmeDriverShutdownOptions {
                         // nvme_keepalive is received from host but it is only valid
                         // when memory pool allocator supports save/restore.
                         do_not_reset: nvme_keepalive
                             && self.context.save_restore_supported
-                            && is_nvme_keepalive_compatible(&pci_id),
+                            && keepalive_compatible,
                         skip_device_shutdown: nvme_keepalive
                             && self.context.save_restore_supported
-                            && is_nvme_keepalive_compatible(&pci_id),
+                            && keepalive_compatible,
                     })
                     .instrument(tracing::info_span!("shutdown_nvme_driver", %pci_id))
             }))
@@ -360,11 +361,17 @@ impl NvmeManagerWorker {
                 match guard.entry(pci_id.clone()) {
                     hash_map::Entry::Occupied(_) => unreachable!(), // We checked above that this entry does not exist.
                     hash_map::Entry::Vacant(entry) => {
+                        // Read the PCI vendor/device IDs once and cache the
+                        // resulting keepalive compatibility flag on the
+                        // `NvmeDriverManager` for later save/shutdown queries.
+                        let keepalive_compatible = is_nvme_keepalive_compatible(&pci_id);
+
                         let driver = NvmeDriverManager::new(
                             &context.driver_source,
                             &pci_id,
                             context.vp_count,
-                            context.save_restore_supported && is_nvme_keepalive_compatible(&pci_id),
+                            context.save_restore_supported && keepalive_compatible,
+                            keepalive_compatible,
                             None, // No device yet,
                             context.nvme_driver_spawner.clone(),
                         )?;
@@ -431,7 +438,7 @@ impl NvmeManagerWorker {
             .write()
             .iter()
             .filter_map(|(pci_id, driver)| {
-                if is_nvme_keepalive_compatible(pci_id) {
+                if driver.keepalive_compatible() {
                     Some((pci_id.clone(), driver.client().clone()))
                 } else {
                     tracing::info!(
@@ -469,6 +476,11 @@ impl NvmeManagerWorker {
             // This should gracefully tear down state for non-keepalive devices
             // before using the device.
             let pci_id = disk.pci_id.clone();
+
+            // Read the PCI vendor/device IDs once and cache the resulting
+            // keepalive compatibility flag on the `NvmeDriverManager`.
+            let keepalive_compatible = is_nvme_keepalive_compatible(&pci_id);
+
             let nvme_driver = self
                 .context
                 .nvme_driver_spawner
@@ -476,7 +488,7 @@ impl NvmeManagerWorker {
                     &self.context.driver_source,
                     &pci_id,
                     saved_state.cpu_count,
-                    save_restore_supported && is_nvme_keepalive_compatible(&pci_id),
+                    save_restore_supported && keepalive_compatible,
                     Some(&disk.driver_state),
                 )
                 .await?;
@@ -488,6 +500,7 @@ impl NvmeManagerWorker {
                     &pci_id,
                     self.context.vp_count,
                     true, // save_restore_supported is always `true` when restoring.
+                    keepalive_compatible,
                     Some(nvme_driver),
                     self.context.nvme_driver_spawner.clone(),
                 )?,
@@ -1088,9 +1101,16 @@ mod tests {
         ));
 
         // Create a driver manager
-        let driver_manager =
-            NvmeDriverManager::new(&driver_source, "0000:00:04.0", 4, false, None, spawner)
-                .unwrap();
+        let driver_manager = NvmeDriverManager::new(
+            &driver_source,
+            "0000:00:04.0",
+            4,
+            false,
+            false,
+            None,
+            spawner,
+        )
+        .unwrap();
 
         let client = driver_manager.client().clone();
 
@@ -1174,6 +1194,7 @@ mod tests {
             "0000:00:05.0",
             4,
             true, // save_restore_supported
+            true,
             None,
             spawner,
         )
diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index 0d8b2a620c..09bd259ad2 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -67,9 +67,13 @@ pub struct NamespaceError {
     source: NvmeSpawnerError,
 }
 
-/// Device prefix for nvme devices that are compatibble with keepalive.
-/// the "dddd:" in the `dddd:bb:dd.f` PCI ID format.
-const KEEPALIVE_COMPATIBLE_DEVICE_PREFIX: &str = "c05b:";
+/// PCI vendor ID, as it appears in the sysfs `vendor` file (e.g. `0x0100`),
+/// for NVMe devices that are compatible with keepalive.
+const KEEPALIVE_INCOMPATIBLE_VENDOR_ID: &str = "0x1414";
+
+/// PCI device ID, as it appears in the sysfs `device` file (e.g. `0x0100`),
+/// for NVMe devices that are compatible with keepalive.
+const KEEPALIVE_INCOMPATIBLE_DEVICE_ID: &str = "0xb111";
 
 #[derive(Debug, Error)]
 pub enum NvmeSpawnerError {
@@ -116,11 +120,44 @@ pub trait CreateNvmeDriver: Inspect + Send + Sync {
     ) -> Result<Box<dyn NvmeDevice>, NvmeSpawnerError>;
 }
 
-/// Returns whether the given PCI ID corresponds to an NVMe device that is
-/// compatible with the keepalive.
-/// DEV_NOTE: This is a weak heuristic based approach which is not ideal but is necessary
+/// Returns whether the given PCI device is compatible with NVMe keepalive.
 pub(crate) fn is_nvme_keepalive_compatible(pci_id: &str) -> bool {
-    pci_id
-        .to_ascii_lowercase()
-        .starts_with(KEEPALIVE_COMPATIBLE_DEVICE_PREFIX)
+    match read_pci_vendor_device_ids(&pci_id) {
+        Ok((vendor_id, device_id)) => {
+            vendor_id != KEEPALIVE_INCOMPATIBLE_VENDOR_ID
+                || device_id != KEEPALIVE_INCOMPATIBLE_DEVICE_ID
+        }
+        Err(err) => {
+            tracing::warn!(
+                pci_id = %pci_id,
+                error = err.as_ref() as &dyn std::error::Error,
+                "failed to read PCI vendor/device IDs; treating device as not keepalive-compatible"
+            );
+            false
+        }
+    }
+}
+
+/// Reads the sysfs `vendor` and `device` files for the given PCI device,
+/// returning the trimmed contents (e.g. `"0x0100"`).
+///
+/// Callers should invoke this once per device and cache the result, since
+/// the values do not change for the lifetime of the device.
+fn read_pci_vendor_device_ids(pci_id: &str) -> anyhow::Result<(String, String)> {
+    let devpath = std::path::Path::new("/sys/bus/pci/devices").join(pci_id);
+    let vendor = fs_err::read_to_string(devpath.join("vendor"))?
+        .trim_end()
+        .to_owned();
+    let device = fs_err::read_to_string(devpath.join("device"))?
+        .trim_end()
+        .to_owned();
+
+    tracing::info!(
+        pci_id = %pci_id,
+        vendor = %vendor,
+        device = %device,
+        "read PCI vendor/device IDs"
+    );
+
+    Ok((vendor, device))
 }
diff --git a/vm/devices/storage/nvme_resources/src/fault.rs b/vm/devices/storage/nvme_resources/src/fault.rs
index 8fcad86766..8ed05989b6 100644
--- a/vm/devices/storage/nvme_resources/src/fault.rs
+++ b/vm/devices/storage/nvme_resources/src/fault.rs
@@ -315,6 +315,42 @@ pub struct CommandMatch {
     pub mask: [u8; 64],
 }
 
+/// A fault configuration for overriding the device's hardware configuration
+/// (i.e. the values reported in PCI configuration space).
+///
+/// When a field is `Some`, its value replaces the corresponding field reported
+/// by the NVMe fault controller's PCI hardware IDs. When `None`, the real
+/// hardware ID value is used unchanged. Unlike most other faults in this
+/// module, hardware config faults are applied at device construction time
+/// and are therefore not gated by the `fault_active` flag.
+///
+/// # Example
+/// Override the Vendor ID and Device ID reported in PCI config space.
+/// ```no_run
+/// use mesh::CellUpdater;
+/// use nvme_resources::fault::FaultConfiguration;
+/// use nvme_resources::fault::HardwareConfigFaultConfig;
+///
+/// pub fn hardware_config_fault() -> FaultConfiguration {
+///     let mut fault_start_updater = CellUpdater::new(false);
+///     FaultConfiguration::new(fault_start_updater.cell())
+///         .with_hardware_config_fault(
+///             HardwareConfigFaultConfig::new()
+///                 .with_vendor_id(0x1414)
+///                 .with_device_id(0x00a9),
+///         )
+/// }
+/// ```
+#[derive(Debug, Copy, Clone, Default, MeshPayload)]
+pub struct HardwareConfigFaultConfig {
+    /// Override for the device's PCI Vendor ID. When `None`, the real Vendor
+    /// ID is used.
+    pub vendor_id: Option<u16>,
+    /// Override for the device's PCI Device ID. When `None`, the real Device
+    /// ID is used.
+    pub device_id: Option<u16>,
+}
+
 /// Fault configuration for the NVMe fault controller.
 ///
 /// This struct defines behaviors that inject faults into the NVMe fault controller logic,
@@ -390,6 +426,8 @@ pub struct FaultConfiguration {
     pub namespace_fault: NamespaceFaultConfig,
     /// Fault to apply to all IO queues
     pub io_fault: Arc<IoQueueFaultConfig>,
+    /// Fault to apply to the Hardware Configuration
+    pub hardware_config_fault: Option<HardwareConfigFaultConfig>,
 }
 
 impl FaultConfiguration {
@@ -404,6 +442,7 @@ impl FaultConfiguration {
             pci_fault: Some(PciFaultConfig::new()),
             namespace_fault: NamespaceFaultConfig::new(mesh::channel().1),
             io_fault: Arc::new(IoQueueFaultConfig::new(fault_active)),
+            hardware_config_fault: None,
         }
     }
 
@@ -430,6 +469,35 @@ impl FaultConfiguration {
         self.namespace_fault = namespace_fault;
         self
     }
+
+    /// Add a hardware config fault configuration to the fault configuration
+    pub fn with_hardware_config_fault(
+        mut self,
+        hardware_config_fault: HardwareConfigFaultConfig,
+    ) -> Self {
+        self.hardware_config_fault = Some(hardware_config_fault);
+        self
+    }
+}
+
+impl HardwareConfigFaultConfig {
+    /// Create a new no-op hardware config fault configuration. All fields
+    /// default to `None`, meaning the real hardware values are used.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Override the PCI Vendor ID reported by the device.
+    pub fn with_vendor_id(mut self, vendor_id: u16) -> Self {
+        self.vendor_id = Some(vendor_id);
+        self
+    }
+
+    /// Override the PCI Device ID reported by the device.
+    pub fn with_device_id(mut self, device_id: u16) -> Self {
+        self.device_id = Some(device_id);
+        self
+    }
 }
 
 impl PciFaultConfig {
diff --git a/vm/devices/storage/nvme_test/src/pci.rs b/vm/devices/storage/nvme_test/src/pci.rs
index 278793d3e8..396951f19b 100644
--- a/vm/devices/storage/nvme_test/src/pci.rs
+++ b/vm/devices/storage/nvme_test/src/pci.rs
@@ -137,10 +137,21 @@ impl NvmeFaultController {
                 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
             );
 
+        // Apply any hardware-config fault overrides for the IDs reported in
+        // PCI configuration space, falling back to the real values when no
+        // override is configured.
+        let hardware_config_fault = fault_configuration.hardware_config_fault.take();
+        let vendor_id = hardware_config_fault
+            .and_then(|f| f.vendor_id)
+            .unwrap_or(VENDOR_ID);
+        let device_id = hardware_config_fault
+            .and_then(|f| f.device_id)
+            .unwrap_or(0x00a9);
+
         let cfg_space = ConfigSpaceType0Emulator::new(
             HardwareIds {
-                vendor_id: VENDOR_ID,
-                device_id: 0x00a9,
+                vendor_id,
+                device_id,
                 revision_id: 0,
                 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
                 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
index 3c654d51dc..c1055f3cc4 100644
--- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
+++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
@@ -19,6 +19,7 @@ use nvme_resources::NvmeFaultControllerHandle;
 use nvme_resources::fault::AdminQueueFaultBehavior;
 use nvme_resources::fault::AdminQueueFaultConfig;
 use nvme_resources::fault::FaultConfiguration;
+use nvme_resources::fault::HardwareConfigFaultConfig;
 use nvme_resources::fault::IoQueueFaultBehavior;
 use nvme_resources::fault::IoQueueFaultConfig;
 use nvme_resources::fault::NamespaceChange;
@@ -1240,6 +1241,11 @@ async fn servicing_keepalive_per_device_gate(
                     .build(),
                 AdminQueueFaultBehavior::Verify(Some(no_keepalive_create_seen_send)),
             ),
+        )
+        .with_hardware_config_fault(
+            HardwareConfigFaultConfig::new()
+                .with_vendor_id(0x1414)
+                .with_device_id(0xb111),
         );
 
     let scsi_instance = Guid::new_random();
@@ -1253,13 +1259,13 @@ async fn servicing_keepalive_per_device_gate(
             b.with_custom_config(move |c| {
                 c.vpci_devices.push(VpciDeviceConfig {
                     vtl: DeviceVtl::Vtl2,
-                    instance_id: WITH_KEEPALIVE_NVME_INSTANCE,
+                    instance_id: NO_KEEPALIVE_NVME_INSTANCE,
                     resource: NvmeFaultControllerHandle {
                         subsystem_id: Guid::new_random(),
                         msix_count: 10,
                         max_io_queues: 10,
                         namespaces: vec![NamespaceDefinition {
-                            nsid: WITH_KEEPALIVE_NSID,
+                            nsid: NO_KEEPALIVE_NSID,
                             read_only: false,
                             disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
                                 len: Some(DEFAULT_DISK_SIZE),
@@ -1267,20 +1273,20 @@ async fn servicing_keepalive_per_device_gate(
                             })
                             .into_resource(),
                         }],
-                        fault_config: with_keepalive_fault_config,
+                        fault_config: no_keepalive_fault_config,
                         enable_tdisp_tests: false,
                     }
                     .into_resource(),
                 });
                 c.vpci_devices.push(VpciDeviceConfig {
                     vtl: DeviceVtl::Vtl2,
-                    instance_id: NO_KEEPALIVE_NVME_INSTANCE,
+                    instance_id: WITH_KEEPALIVE_NVME_INSTANCE,
                     resource: NvmeFaultControllerHandle {
                         subsystem_id: Guid::new_random(),
                         msix_count: 10,
                         max_io_queues: 10,
                         namespaces: vec![NamespaceDefinition {
-                            nsid: NO_KEEPALIVE_NSID,
+                            nsid: WITH_KEEPALIVE_NSID,
                             read_only: false,
                             disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
                                 len: Some(DEFAULT_DISK_SIZE),
@@ -1288,7 +1294,7 @@ async fn servicing_keepalive_per_device_gate(
                             })
                             .into_resource(),
                         }],
-                        fault_config: no_keepalive_fault_config,
+                        fault_config: with_keepalive_fault_config,
                         enable_tdisp_tests: false,
                     }
                     .into_resource(),
@@ -1300,20 +1306,20 @@ async fn servicing_keepalive_per_device_gate(
                 .with_instance_id(scsi_instance)
                 .add_lun(
                     Vtl2LunBuilder::disk()
-                        .with_location(WITH_KEEPALIVE_LUN)
+                        .with_location(NO_KEEPALIVE_LUN)
                         .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
                             ControllerType::Nvme,
-                            WITH_KEEPALIVE_NVME_INSTANCE,
-                            WITH_KEEPALIVE_NSID,
+                            NO_KEEPALIVE_NVME_INSTANCE,
+                            NO_KEEPALIVE_NSID,
                         )),
                 )
                 .add_lun(
                     Vtl2LunBuilder::disk()
-                        .with_location(NO_KEEPALIVE_LUN)
+                        .with_location(WITH_KEEPALIVE_LUN)
                         .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
                             ControllerType::Nvme,
-                            NO_KEEPALIVE_NVME_INSTANCE,
-                            NO_KEEPALIVE_NSID,
+                            WITH_KEEPALIVE_NVME_INSTANCE,
+                            WITH_KEEPALIVE_NSID,
                         )),
                 )
                 .build(),

From dea6e9e93ff1320c793f5f90ddcd31f49944b00d Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Mon, 4 May 2026 17:02:50 -0700
Subject: [PATCH 12/17] Minor updates to comments and such

---
 openhcl/underhill_core/src/nvme_manager/device.rs  | 2 --
 openhcl/underhill_core/src/nvme_manager/manager.rs | 6 ------
 openhcl/underhill_core/src/nvme_manager/mod.rs     | 4 ++--
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/device.rs b/openhcl/underhill_core/src/nvme_manager/device.rs
index e9bda747b1..cf57f37a75 100644
--- a/openhcl/underhill_core/src/nvme_manager/device.rs
+++ b/openhcl/underhill_core/src/nvme_manager/device.rs
@@ -334,8 +334,6 @@ enum NvmeDriverRequest {
 pub struct NvmeDriverManager {
     task: Task<()>,
     pci_id: String,
-    /// Whether the underlying device is compatible with NVMe keepalive
-    /// (cached at construction time so callers don't have to re-read sysfs).
     keepalive_compatible: bool,
     client: NvmeDriverManagerClient,
 }
diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs
index 13f86430a7..910a113f12 100644
--- a/openhcl/underhill_core/src/nvme_manager/manager.rs
+++ b/openhcl/underhill_core/src/nvme_manager/manager.rs
@@ -361,9 +361,6 @@ impl NvmeManagerWorker {
                 match guard.entry(pci_id.clone()) {
                     hash_map::Entry::Occupied(_) => unreachable!(), // We checked above that this entry does not exist.
                     hash_map::Entry::Vacant(entry) => {
-                        // Read the PCI vendor/device IDs once and cache the
-                        // resulting keepalive compatibility flag on the
-                        // `NvmeDriverManager` for later save/shutdown queries.
                         let keepalive_compatible = is_nvme_keepalive_compatible(&pci_id);
 
                         let driver = NvmeDriverManager::new(
@@ -472,9 +469,6 @@ impl NvmeManagerWorker {
         let mut restored_devices: HashMap<String, NvmeDriverManager> = HashMap::new();
 
         for disk in &saved_state.nvme_disks {
-            // Only restore disks that are known to be keepalive compatible.
-            // This should gracefully tear down state for non-keepalive devices
-            // before using the device.
             let pci_id = disk.pci_id.clone();
 
             // Read the PCI vendor/device IDs once and cache the resulting
diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index 09bd259ad2..9244f7ba50 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -68,11 +68,11 @@ pub struct NamespaceError {
 }
 
 /// PCI vendor ID, as it appears in the sysfs `vendor` file (e.g. `0x0100`),
-/// for NVMe devices that are compatible with keepalive.
+/// for NVMe devices that are incompatible with keepalive.
 const KEEPALIVE_INCOMPATIBLE_VENDOR_ID: &str = "0x1414";
 
 /// PCI device ID, as it appears in the sysfs `device` file (e.g. `0x0100`),
-/// for NVMe devices that are compatible with keepalive.
+/// for NVMe devices that are incompatible with keepalive.
 const KEEPALIVE_INCOMPATIBLE_DEVICE_ID: &str = "0xb111";
 
 #[derive(Debug, Error)]

From 7d4c3e236faebb874097529208cb4113649d01d8 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Wed, 6 May 2026 17:37:43 -0700
Subject: [PATCH 13/17] Remove per device gate test

---
 .../tests/multiarch/openhcl_servicing.rs      | 190 ------------------
 1 file changed, 190 deletions(-)

diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
index c1055f3cc4..0f2cbf68e2 100644
--- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
+++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
@@ -1170,196 +1170,6 @@ async fn servicing_keepalive_slow_create_io_queue(
     Ok(())
 }
 
-/// Verifies the per-device NVMe keepalive gate.
-///
-/// Two NVMe controllers are attached to a single VM:
-/// * A "c05b" device whose VPCI instance ID has `data2 = 0xc05b`, which
-///   makes its derived pci_id start with `c05b:` (matching the
-///   keepalive-compatible heuristic). Keepalive must be honored for
-///   this device — its controller is not reset across servicing, so
-///   `CREATE_IO_COMPLETION_QUEUE` must NOT be issued after servicing.
-///   The fault panics if the opcode is observed.
-/// * A device whose pci_id does not start with
-///   `c05b:`. Keepalive must be downgraded to reset-on-servicing for
-///   this device, so `CREATE_IO_COMPLETION_QUEUE` MUST be issued after
-///   servicing. The fault verifies (via a oneshot) that the opcode is
-///   observed.
-///
-/// Keepalive is enabled VM-wide; the gate is expected to apply per
-/// device based on its pci_id.
-#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])]
-async fn servicing_keepalive_per_device_gate(
-    config: PetriVmBuilder<OpenVmmPetriBackend>,
-    (igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
-) -> Result<(), anyhow::Error> {
-    let mut flags = config.default_servicing_flags();
-    flags.enable_nvme_keepalive = true;
-
-    // VPCI instance IDs. The pci_id seen by the NVMe driver is derived
-    // as `format!("{:04x}:00:00.0", instance_id.data2)` (see
-    // `vpci_path` in `dispatch/vtl2_settings_worker.rs`), so `data2`
-    // controls the prefix of the pci_id. For the device with keepalive we set data2 = 0xc05b
-    // so its pci_id is `c05b:00:00.0`.
-    const WITH_KEEPALIVE_NVME_INSTANCE: Guid = guid::guid!("00000000-c05b-0000-0000-000000000001");
-    const NO_KEEPALIVE_NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
-
-    const WITH_KEEPALIVE_NSID: u32 = KEEPALIVE_VTL2_NSID;
-    const NO_KEEPALIVE_NSID: u32 = KEEPALIVE_VTL2_NSID + 1;
-    const WITH_KEEPALIVE_LUN: u32 = VTL0_NVME_LUN;
-    const NO_KEEPALIVE_LUN: u32 = VTL0_NVME_LUN + 1;
-
-    // Two independent fault start cells — one per device.
-    let mut with_keepalive_fault_updater = CellUpdater::new(false);
-    let mut no_keepalive_fault_updater = CellUpdater::new(false);
-
-    let (no_keepalive_create_seen_send, no_keepalive_create_seen_recv) = mesh::oneshot::<()>();
-
-    // c05b device: keepalive must be honored — fail loudly if any
-    // CREATE_IO_COMPLETION_QUEUE is observed after the fault is armed.
-    let with_keepalive_fault_config = FaultConfiguration::new(with_keepalive_fault_updater.cell())
-        .with_admin_queue_fault(
-            AdminQueueFaultConfig::new().with_submission_queue_fault(
-                CommandMatchBuilder::new()
-                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
-                    .build(),
-                AdminQueueFaultBehavior::Panic(
-                    "c05b device received CREATE_IO_COMPLETION_QUEUE after servicing — \
-                     keepalive should have been honored for this device but the controller \
-                     was reset."
-                        .to_string(),
-                ),
-            ),
-        );
-
-    // Non c05b device: keepalive must be downgraded —
-    // verify CREATE_IO_COMPLETION_QUEUE IS issued after servicing.
-    let no_keepalive_fault_config = FaultConfiguration::new(no_keepalive_fault_updater.cell())
-        .with_admin_queue_fault(
-            AdminQueueFaultConfig::new().with_submission_queue_fault(
-                CommandMatchBuilder::new()
-                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
-                    .build(),
-                AdminQueueFaultBehavior::Verify(Some(no_keepalive_create_seen_send)),
-            ),
-        )
-        .with_hardware_config_fault(
-            HardwareConfigFaultConfig::new()
-                .with_vendor_id(0x1414)
-                .with_device_id(0xb111),
-        );
-
-    let scsi_instance = Guid::new_random();
-
-    let (mut vm, agent) = config
-        .with_vmbus_redirect(true)
-        .with_openhcl_command_line(
-            "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-        )
-        .modify_backend(move |b| {
-            b.with_custom_config(move |c| {
-                c.vpci_devices.push(VpciDeviceConfig {
-                    vtl: DeviceVtl::Vtl2,
-                    instance_id: NO_KEEPALIVE_NVME_INSTANCE,
-                    resource: NvmeFaultControllerHandle {
-                        subsystem_id: Guid::new_random(),
-                        msix_count: 10,
-                        max_io_queues: 10,
-                        namespaces: vec![NamespaceDefinition {
-                            nsid: NO_KEEPALIVE_NSID,
-                            read_only: false,
-                            disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
-                                len: Some(DEFAULT_DISK_SIZE),
-                                sector_size: None,
-                            })
-                            .into_resource(),
-                        }],
-                        fault_config: no_keepalive_fault_config,
-                        enable_tdisp_tests: false,
-                    }
-                    .into_resource(),
-                });
-                c.vpci_devices.push(VpciDeviceConfig {
-                    vtl: DeviceVtl::Vtl2,
-                    instance_id: WITH_KEEPALIVE_NVME_INSTANCE,
-                    resource: NvmeFaultControllerHandle {
-                        subsystem_id: Guid::new_random(),
-                        msix_count: 10,
-                        max_io_queues: 10,
-                        namespaces: vec![NamespaceDefinition {
-                            nsid: WITH_KEEPALIVE_NSID,
-                            read_only: false,
-                            disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
-                                len: Some(DEFAULT_DISK_SIZE),
-                                sector_size: None,
-                            })
-                            .into_resource(),
-                        }],
-                        fault_config: with_keepalive_fault_config,
-                        enable_tdisp_tests: false,
-                    }
-                    .into_resource(),
-                });
-            })
-        })
-        .add_vtl2_storage_controller(
-            Vtl2StorageControllerBuilder::new(ControllerType::Scsi)
-                .with_instance_id(scsi_instance)
-                .add_lun(
-                    Vtl2LunBuilder::disk()
-                        .with_location(NO_KEEPALIVE_LUN)
-                        .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
-                            ControllerType::Nvme,
-                            NO_KEEPALIVE_NVME_INSTANCE,
-                            NO_KEEPALIVE_NSID,
-                        )),
-                )
-                .add_lun(
-                    Vtl2LunBuilder::disk()
-                        .with_location(WITH_KEEPALIVE_LUN)
-                        .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
-                            ControllerType::Nvme,
-                            WITH_KEEPALIVE_NVME_INSTANCE,
-                            WITH_KEEPALIVE_NSID,
-                        )),
-                )
-                .build(),
-        )
-        .run()
-        .await?;
-
-    agent.ping().await?;
-
-    // Arm both faults BEFORE servicing so that the post-servicing
-    // CREATE_IO_COMPLETION_QUEUE is what each fault matches.
-    with_keepalive_fault_updater.set(true).await;
-    no_keepalive_fault_updater.set(true).await;
-
-    vm.restart_openhcl(igvm_file.clone(), flags).await?;
-
-    agent.ping().await?;
-
-    // The non-c05b device must issue CREATE_IO_COMPLETION_QUEUE after
-    // servicing because its keepalive was downgraded to reset.
-    CancelContext::new()
-        .with_timeout(Duration::from_secs(60))
-        .until_cancelled(no_keepalive_create_seen_recv)
-        .await
-        .expect(
-            "non-c05b NVMe device did not issue CREATE_IO_COMPLETION_QUEUE within 60s after \
-             servicing — the per-device keepalive gate did not downgrade keepalive for this \
-             device.",
-        )
-        .expect("CREATE_IO_COMPLETION_QUEUE verification on non-c05b device failed");
-
-    // If the c05b device had received CREATE_IO_COMPLETION_QUEUE, the
-    // panic fault would have crashed the VM and failed this test. We
-    // disarm the faults defensively before tearing down.
-    with_keepalive_fault_updater.set(false).await;
-    no_keepalive_fault_updater.set(false).await;
-
-    Ok(())
-}
-
 async fn apply_fault_with_keepalive(
     config: PetriVmBuilder<OpenVmmPetriBackend>,
     fault_configuration: FaultConfiguration,

From 0af4f8e4da0164a77850d7ec139fc4227375e8bd Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Wed, 6 May 2026 17:39:58 -0700
Subject: [PATCH 14/17] Added test gated to nvme direct 2 devices back in

---
 .../tests/multiarch/openhcl_servicing.rs      | 190 ++++++++++++++++++
 1 file changed, 190 insertions(+)

diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
index 0e47989e23..f7ada33715 100644
--- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
+++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
@@ -1315,6 +1315,196 @@ async fn servicing_keepalive_slow_create_io_queue_with_inspect(
     Ok(())
 }
 
+/// Verifies the per-device NVMe keepalive gate.
+///
+/// Two NVMe controllers are attached to a single VM:
+/// * A "c05b" device whose VPCI instance ID has `data2 = 0xc05b`, which
+///   makes its derived pci_id start with `c05b:` (matching the
+///   keepalive-compatible heuristic). Keepalive must be honored for
+///   this device — its controller is not reset across servicing, so
+///   `CREATE_IO_COMPLETION_QUEUE` must NOT be issued after servicing.
+///   The fault panics if the opcode is observed.
+/// * A device whose pci_id does not start with
+///   `c05b:`. Keepalive must be downgraded to reset-on-servicing for
+///   this device, so `CREATE_IO_COMPLETION_QUEUE` MUST be issued after
+///   servicing. The fault verifies (via a oneshot) that the opcode is
+///   observed.
+///
+/// Keepalive is enabled VM-wide; the gate is expected to apply per
+/// device based on its pci_id.
+#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])]
+async fn servicing_keepalive_per_device_gate(
+    config: PetriVmBuilder<OpenVmmPetriBackend>,
+    (igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
+) -> Result<(), anyhow::Error> {
+    let mut flags = config.default_servicing_flags();
+    flags.enable_nvme_keepalive = true;
+
+    // VPCI instance IDs. The pci_id seen by the NVMe driver is derived
+    // as `format!("{:04x}:00:00.0", instance_id.data2)` (see
+    // `vpci_path` in `dispatch/vtl2_settings_worker.rs`), so `data2`
+    // controls the prefix of the pci_id. For the device with keepalive we set data2 = 0xc05b
+    // so its pci_id is `c05b:00:00.0`.
+    const WITH_KEEPALIVE_NVME_INSTANCE: Guid = guid::guid!("00000000-c05b-0000-0000-000000000001");
+    const NO_KEEPALIVE_NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
+
+    const WITH_KEEPALIVE_NSID: u32 = KEEPALIVE_VTL2_NSID;
+    const NO_KEEPALIVE_NSID: u32 = KEEPALIVE_VTL2_NSID + 1;
+    const WITH_KEEPALIVE_LUN: u32 = VTL0_NVME_LUN;
+    const NO_KEEPALIVE_LUN: u32 = VTL0_NVME_LUN + 1;
+
+    // Two independent fault start cells — one per device.
+    let mut with_keepalive_fault_updater = CellUpdater::new(false);
+    let mut no_keepalive_fault_updater = CellUpdater::new(false);
+
+    let (no_keepalive_create_seen_send, no_keepalive_create_seen_recv) = mesh::oneshot::<()>();
+
+    // c05b device: keepalive must be honored — fail loudly if any
+    // CREATE_IO_COMPLETION_QUEUE is observed after the fault is armed.
+    let with_keepalive_fault_config = FaultConfiguration::new(with_keepalive_fault_updater.cell())
+        .with_admin_queue_fault(
+            AdminQueueFaultConfig::new().with_submission_queue_fault(
+                CommandMatchBuilder::new()
+                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
+                    .build(),
+                AdminQueueFaultBehavior::Panic(
+                    "c05b device received CREATE_IO_COMPLETION_QUEUE after servicing — \
+                     keepalive should have been honored for this device but the controller \
+                     was reset."
+                        .to_string(),
+                ),
+            ),
+        );
+
+    // Non c05b device: keepalive must be downgraded —
+    // verify CREATE_IO_COMPLETION_QUEUE IS issued after servicing.
+    let no_keepalive_fault_config = FaultConfiguration::new(no_keepalive_fault_updater.cell())
+        .with_admin_queue_fault(
+            AdminQueueFaultConfig::new().with_submission_queue_fault(
+                CommandMatchBuilder::new()
+                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
+                    .build(),
+                AdminQueueFaultBehavior::Verify(Some(no_keepalive_create_seen_send)),
+            ),
+        )
+        .with_hardware_config_fault(
+            HardwareConfigFaultConfig::new()
+                .with_vendor_id(0x1414)
+                .with_device_id(0xb111),
+        );
+
+    let scsi_instance = Guid::new_random();
+
+    let (mut vm, agent) = config
+        .with_vmbus_redirect(true)
+        .with_openhcl_command_line(
+            "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+        )
+        .modify_backend(move |b| {
+            b.with_custom_config(move |c| {
+                c.vpci_devices.push(VpciDeviceConfig {
+                    vtl: DeviceVtl::Vtl2,
+                    instance_id: NO_KEEPALIVE_NVME_INSTANCE,
+                    resource: NvmeFaultControllerHandle {
+                        subsystem_id: Guid::new_random(),
+                        msix_count: 10,
+                        max_io_queues: 10,
+                        namespaces: vec![NamespaceDefinition {
+                            nsid: NO_KEEPALIVE_NSID,
+                            read_only: false,
+                            disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
+                                len: Some(DEFAULT_DISK_SIZE),
+                                sector_size: None,
+                            })
+                            .into_resource(),
+                        }],
+                        fault_config: no_keepalive_fault_config,
+                        enable_tdisp_tests: false,
+                    }
+                    .into_resource(),
+                });
+                c.vpci_devices.push(VpciDeviceConfig {
+                    vtl: DeviceVtl::Vtl2,
+                    instance_id: WITH_KEEPALIVE_NVME_INSTANCE,
+                    resource: NvmeFaultControllerHandle {
+                        subsystem_id: Guid::new_random(),
+                        msix_count: 10,
+                        max_io_queues: 10,
+                        namespaces: vec![NamespaceDefinition {
+                            nsid: WITH_KEEPALIVE_NSID,
+                            read_only: false,
+                            disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
+                                len: Some(DEFAULT_DISK_SIZE),
+                                sector_size: None,
+                            })
+                            .into_resource(),
+                        }],
+                        fault_config: with_keepalive_fault_config,
+                        enable_tdisp_tests: false,
+                    }
+                    .into_resource(),
+                });
+            })
+        })
+        .add_vtl2_storage_controller(
+            Vtl2StorageControllerBuilder::new(ControllerType::Scsi)
+                .with_instance_id(scsi_instance)
+                .add_lun(
+                    Vtl2LunBuilder::disk()
+                        .with_location(NO_KEEPALIVE_LUN)
+                        .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
+                            ControllerType::Nvme,
+                            NO_KEEPALIVE_NVME_INSTANCE,
+                            NO_KEEPALIVE_NSID,
+                        )),
+                )
+                .add_lun(
+                    Vtl2LunBuilder::disk()
+                        .with_location(WITH_KEEPALIVE_LUN)
+                        .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
+                            ControllerType::Nvme,
+                            WITH_KEEPALIVE_NVME_INSTANCE,
+                            WITH_KEEPALIVE_NSID,
+                        )),
+                )
+                .build(),
+        )
+        .run()
+        .await?;
+
+    agent.ping().await?;
+
+    // Arm both faults BEFORE servicing so that the post-servicing
+    // CREATE_IO_COMPLETION_QUEUE is what each fault matches.
+    with_keepalive_fault_updater.set(true).await;
+    no_keepalive_fault_updater.set(true).await;
+
+    vm.restart_openhcl(igvm_file.clone(), flags).await?;
+
+    agent.ping().await?;
+
+    // The non-c05b device must issue CREATE_IO_COMPLETION_QUEUE after
+    // servicing because its keepalive was downgraded to reset.
+    CancelContext::new()
+        .with_timeout(Duration::from_secs(60))
+        .until_cancelled(no_keepalive_create_seen_recv)
+        .await
+        .expect(
+            "non-c05b NVMe device did not issue CREATE_IO_COMPLETION_QUEUE within 60s after \
+             servicing — the per-device keepalive gate did not downgrade keepalive for this \
+             device.",
+        )
+        .expect("CREATE_IO_COMPLETION_QUEUE verification on non-c05b device failed");
+
+    // If the c05b device had received CREATE_IO_COMPLETION_QUEUE, the
+    // panic fault would have crashed the VM and failed this test. We
+    // disarm the faults defensively before tearing down.
+    with_keepalive_fault_updater.set(false).await;
+    no_keepalive_fault_updater.set(false).await;
+
+    Ok(())
+}
+
 async fn apply_fault_with_keepalive(
     config: PetriVmBuilder<OpenVmmPetriBackend>,
     fault_configuration: FaultConfiguration,

From 0a96c9df5d4eefb8a6ce4fcb965f59719b2fd2e0 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Wed, 6 May 2026 17:43:22 -0700
Subject: [PATCH 15/17] Revert "nvme_driver: disable nvme keepalive (#3267)"

This reverts commit e84570e190f22cc8ed0508c7449b346c81da5fd3.
---
 openhcl/openhcl_boot/src/cmdline.rs            |  9 +++------
 .../tests/tests/multiarch/openhcl_servicing.rs | 18 ++++++------------
 .../tests/tests/x86_64/openhcl_uefi.rs         | 10 +++++-----
 3 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/openhcl/openhcl_boot/src/cmdline.rs b/openhcl/openhcl_boot/src/cmdline.rs
index a452b936e1..64239ecbce 100644
--- a/openhcl/openhcl_boot/src/cmdline.rs
+++ b/openhcl/openhcl_boot/src/cmdline.rs
@@ -132,7 +132,7 @@ impl BootCommandLineOptions {
             confidential_debug: false,
             enable_vtl2_gpa_pool: Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Release), // use the release config by default
             sidecar: SidecarOptions::default(),
-            disable_nvme_keep_alive: true,
+            disable_nvme_keep_alive: false,
             vtl2_gpa_pool_numa_split: false,
         }
     }
@@ -141,9 +141,6 @@ impl BootCommandLineOptions {
 impl BootCommandLineOptions {
     /// Parse arguments from a command line.
     pub fn parse(&mut self, cmdline: &str) {
-        // Workaround for a host side issue: disable NVMe keepalive by default.
-        self.disable_nvme_keep_alive = true;
-
         let mut override_vtl2_gpa_pool: Option<Vtl2GpaPoolConfig> = None;
         for arg in cmdline.split_whitespace() {
             if arg.starts_with(OPENHCL_CONFIDENTIAL_DEBUG_ENV_VAR_NAME) {
@@ -186,8 +183,8 @@ impl BootCommandLineOptions {
                 }
             } else if arg.starts_with(DISABLE_NVME_KEEP_ALIVE) {
                 let arg = arg.split_once('=').map(|(_, arg)| arg);
-                if arg.is_some_and(|a| a == "0") {
-                    self.disable_nvme_keep_alive = false;
+                if arg.is_some_and(|a| a != "0") {
+                    self.disable_nvme_keep_alive = true;
                 }
             } else if arg.starts_with(VTL2_GPA_POOL_NUMA) {
                 if let Some((_, arg)) = arg.split_once('=') {
diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
index f7ada33715..af86e163d3 100644
--- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
+++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
@@ -148,9 +148,7 @@ async fn servicing_keepalive_no_device<T: PetriVmmBackend>(
 ) -> anyhow::Result<()> {
     let flags = config.default_servicing_flags();
     openhcl_servicing_core(
-        config.with_openhcl_command_line(
-            "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-        ),
+        config.with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512"),
         igvm_file,
         flags,
         DEFAULT_SERVICING_COUNT,
@@ -199,9 +197,7 @@ async fn servicing_keepalive_with_device<T: PetriVmmBackend>(
     let flags = config.default_servicing_flags();
     openhcl_servicing_core(
         config
-            .with_openhcl_command_line(
-                "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-            )
+            .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512")
             .with_boot_device_type(petri::BootDeviceType::ScsiViaNvme)
             .with_vmbus_redirect(true), // Need this to attach the NVMe device
         igvm_file,
@@ -1555,9 +1551,7 @@ async fn create_keepalive_test_config_default(
 
     config
         .with_vmbus_redirect(true)
-        .with_openhcl_command_line(
-            "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-        )
+        .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512")
         .modify_backend(move |b| {
             b.with_custom_config(|c| {
                 // Add a fault controller to test the nvme controller functionality
@@ -1645,9 +1639,9 @@ async fn create_keepalive_test_config_custom(
 ) -> Result<(PetriVm<OpenVmmPetriBackend>, PipetteClient), anyhow::Error> {
     const NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
 
-    let mut builder = config.with_vmbus_redirect(true).with_openhcl_command_line(
-        "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-    );
+    let mut builder = config
+        .with_vmbus_redirect(true)
+        .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512");
 
     for cmdline in extra_cmdlines {
         builder = builder.with_openhcl_command_line(cmdline);
diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs
index b43f0ec855..593e78dcc6 100644
--- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs
+++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs
@@ -193,7 +193,7 @@ async fn nvme_relay_explicit_private_pool(
     nvme_relay_test_core(
         config,
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+            openhcl_cmdline: "OPENHCL_ENABLE_VTL2_GPA_POOL=512",
             expected_props: Some(ExpectedNvmeDeviceProperties {
                 save_restore_supported: true,
                 qsize: 256, // private pool should allow contiguous allocations.
@@ -218,7 +218,7 @@ async fn nvme_relay_heuristic_debug_16vp_768mb_heavy(
     nvme_relay_test_core(
         config,
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+            openhcl_cmdline: "",
             processor_topology: Some(ProcessorTopology::heavy()),
             vtl2_base_address_type: Some(openvmm_defs::config::Vtl2BaseAddressType::Vtl2Allocate {
                 size: Some(768 * 1024 * 1024),
@@ -246,7 +246,7 @@ async fn nvme_relay_heuristic_release_16vp_256mb_heavy(
     nvme_relay_test_core(
         config,
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+            openhcl_cmdline: "",
             processor_topology: Some(ProcessorTopology::heavy()),
             vtl2_base_address_type: Some(openvmm_defs::config::Vtl2BaseAddressType::Vtl2Allocate {
                 size: Some(256 * 1024 * 1024),
@@ -277,7 +277,7 @@ async fn nvme_relay_heuristic_release_32vp_500mb_very_heavy(
     nvme_relay_test_core(
         config,
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+            openhcl_cmdline: "",
             processor_topology: Some(ProcessorTopology::very_heavy()),
             vtl2_base_address_type: Some(openvmm_defs::config::Vtl2BaseAddressType::Vtl2Allocate {
                 size: Some(500 * 1024 * 1024),
@@ -307,7 +307,7 @@ async fn nvme_relay_32vp_768mb_very_heavy(
             ..Default::default()
         }),
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_DISABLE_NVME_KEEP_ALIVE=0 OPENHCL_ENABLE_VTL2_GPA_POOL=10240",
+            openhcl_cmdline: "OPENHCL_ENABLE_VTL2_GPA_POOL=10240",
             processor_topology: Some(ProcessorTopology::very_heavy()),
             vtl2_base_address_type: Some(openvmm_defs::config::Vtl2BaseAddressType::Vtl2Allocate {
                 size: Some(768 * 1024 * 1024),

From 4efda83fd3f175c3e606a49f635f61ff793d2c77 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Wed, 6 May 2026 17:45:36 -0700
Subject: [PATCH 16/17] For a local build

---
 openhcl/underhill_core/src/nvme_manager/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index 9244f7ba50..f784e179a9 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -73,7 +73,7 @@ const KEEPALIVE_INCOMPATIBLE_VENDOR_ID: &str = "0x1414";
 
 /// PCI device ID, as it appears in the sysfs `device` file (e.g. `0x0100`),
 /// for NVMe devices that are incompatible with keepalive.
-const KEEPALIVE_INCOMPATIBLE_DEVICE_ID: &str = "0xb111";
+const KEEPALIVE_INCOMPATIBLE_DEVICE_ID: &str = "0x0000";
 
 #[derive(Debug, Error)]
 pub enum NvmeSpawnerError {

From 88aed69b618f45164f6bbd06a17254c4ff4cf0f4 Mon Sep 17 00:00:00 2001
From: Guramrit Singh <gurasingh@microsoft.com>
Date: Wed, 6 May 2026 17:57:59 -0700
Subject: [PATCH 17/17] Revert "For a local build"

This reverts commit 4efda83fd3f175c3e606a49f635f61ff793d2c77.
---
 openhcl/underhill_core/src/nvme_manager/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index f784e179a9..9244f7ba50 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -73,7 +73,7 @@ const KEEPALIVE_INCOMPATIBLE_VENDOR_ID: &str = "0x1414";
 
 /// PCI device ID, as it appears in the sysfs `device` file (e.g. `0x0100`),
 /// for NVMe devices that are incompatible with keepalive.
-const KEEPALIVE_INCOMPATIBLE_DEVICE_ID: &str = "0x0000";
+const KEEPALIVE_INCOMPATIBLE_DEVICE_ID: &str = "0xb111";
 
 #[derive(Debug, Error)]
 pub enum NvmeSpawnerError {