diff --git a/openhcl/openhcl_boot/src/cmdline.rs b/openhcl/openhcl_boot/src/cmdline.rs
index a452b936e1..64239ecbce 100644
--- a/openhcl/openhcl_boot/src/cmdline.rs
+++ b/openhcl/openhcl_boot/src/cmdline.rs
@@ -132,7 +132,7 @@ impl BootCommandLineOptions {
             confidential_debug: false,
             enable_vtl2_gpa_pool: Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Release), // use the release config by default
             sidecar: SidecarOptions::default(),
-            disable_nvme_keep_alive: true,
+            disable_nvme_keep_alive: false,
             vtl2_gpa_pool_numa_split: false,
         }
     }
@@ -141,9 +141,6 @@ impl BootCommandLineOptions {
 impl BootCommandLineOptions {
     /// Parse arguments from a command line.
     pub fn parse(&mut self, cmdline: &str) {
-        // Workaround for a host side issue: disable NVMe keepalive by default.
-        self.disable_nvme_keep_alive = true;
-
         let mut override_vtl2_gpa_pool: Option<Vtl2GpaPoolConfig> = None;
         for arg in cmdline.split_whitespace() {
             if arg.starts_with(OPENHCL_CONFIDENTIAL_DEBUG_ENV_VAR_NAME) {
@@ -186,8 +183,8 @@ impl BootCommandLineOptions {
                 }
             } else if arg.starts_with(DISABLE_NVME_KEEP_ALIVE) {
                 let arg = arg.split_once('=').map(|(_, arg)| arg);
-                if arg.is_some_and(|a| a == "0") {
-                    self.disable_nvme_keep_alive = false;
+                if arg.is_some_and(|a| a != "0") {
+                    self.disable_nvme_keep_alive = true;
                 }
             } else if arg.starts_with(VTL2_GPA_POOL_NUMA) {
                 if let Some((_, arg)) = arg.split_once('=') {
diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs
index 511639253f..0ac9610f83 100644
--- a/openhcl/underhill_core/src/dispatch/mod.rs
+++ b/openhcl/underhill_core/src/dispatch/mod.rs
@@ -444,16 +444,17 @@ impl LoadedVm {
                         capabilities_flags,
                     } = message;
 
-                    // If the host provided timeout hint is >= uint16::max
-                    // seconds, we treat that as a signal from the host that no
+                    // If the host provided timeout hint is >= 59 seconds,
+                    // we treat that as a signal from the host that no
                     // timeout duration was set. We instead limit servicing to
-                    // 200s in that case.
-                    let timeout_hint = if timeout_hint >= Duration::from_secs(u16::MAX as u64) {
+                    // 59s in that case.
+                    let timeout_hint = if timeout_hint >= Duration::from_secs(59) {
                         tracing::info!(
                             CVM_ALLOWED,
-                            "host provided UINT16_MAX timeout hint, defaulting to 200s"
+                            host_timeout_hint_ms = timeout_hint.as_millis() as u64,
+                            "host provided timeout hint > 59s, defaulting to 59s"
                         );
-                        Duration::from_secs(200)
+                        Duration::from_secs(59)
                     } else {
                         timeout_hint
                     };
diff --git a/openhcl/underhill_core/src/nvme_manager/device.rs b/openhcl/underhill_core/src/nvme_manager/device.rs
index 3fb4892297..fd80795268 100644
--- a/openhcl/underhill_core/src/nvme_manager/device.rs
+++ b/openhcl/underhill_core/src/nvme_manager/device.rs
@@ -334,6 +334,7 @@ enum NvmeDriverRequest {
 pub struct NvmeDriverManager {
     task: Task<()>,
     pci_id: String,
+    keepalive_compatible: bool,
     client: NvmeDriverManagerClient,
 }
 
@@ -358,12 +359,18 @@ impl NvmeDriverManager {
         &self.client
     }
 
+    /// Returns whether the underlying device is compatible with NVMe keepalive.
+    pub fn keepalive_compatible(&self) -> bool {
+        self.keepalive_compatible
+    }
+
     /// Creates the [`NvmeDriverManager`].
     pub fn new(
         driver_source: &VmTaskDriverSource,
         pci_id: &str,
         vp_count: u32,
         save_restore_supported: bool,
+        keepalive_compatible: bool,
         device: Option<Box<dyn NvmeDevice>>,
         nvme_driver_spawner: Arc<dyn CreateNvmeDriver>,
     ) -> anyhow::Result<Self> {
@@ -382,6 +389,7 @@ impl NvmeDriverManager {
         Ok(Self {
             task,
             pci_id: pci_id.into(),
+            keepalive_compatible,
             client: NvmeDriverManagerClient {
                 pci_id: pci_id.into(),
                 sender: send,
@@ -557,6 +565,7 @@ impl NvmeDriverManagerWorker {
                 }
                 NvmeDriverRequest::Shutdown(rpc) => {
                     rpc.handle(async |(_span, options)| {
+                            tracing::info!(pci_id = %self.pci_id, do_not_reset = %options.do_not_reset, skip_device_shutdown = %options.skip_device_shutdown, "nvme device manager worker shutdown called");
                             // Driver may be `None` here if there was a failure during driver creation.
                             // In that case, we just skip the shutdown rather than panic.
                             match self.driver.take() {
diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs
index 1b59109fec..910a113f12 100644
--- a/openhcl/underhill_core/src/nvme_manager/manager.rs
+++ b/openhcl/underhill_core/src/nvme_manager/manager.rs
@@ -5,6 +5,7 @@ use crate::nvme_manager::CreateNvmeDriver;
 use crate::nvme_manager::device::NvmeDriverManager;
 use crate::nvme_manager::device::NvmeDriverManagerClient;
 use crate::nvme_manager::device::NvmeDriverShutdownOptions;
+use crate::nvme_manager::is_nvme_keepalive_compatible;
 use crate::nvme_manager::save_restore::NvmeManagerSavedState;
 use crate::nvme_manager::save_restore::NvmeSavedDiskConfig;
 use crate::servicing::NvmeSavedState;
@@ -301,12 +302,17 @@ impl NvmeManagerWorker {
 
         async {
             join_all(devices_to_shutdown.into_iter().map(|(pci_id, driver)| {
+                let keepalive_compatible = driver.keepalive_compatible();
                 driver
                     .shutdown(NvmeDriverShutdownOptions {
                         // nvme_keepalive is received from host but it is only valid
                         // when memory pool allocator supports save/restore.
-                        do_not_reset: nvme_keepalive && self.context.save_restore_supported,
-                        skip_device_shutdown: nvme_keepalive && self.context.save_restore_supported,
+                        do_not_reset: nvme_keepalive
+                            && self.context.save_restore_supported
+                            && keepalive_compatible,
+                        skip_device_shutdown: nvme_keepalive
+                            && self.context.save_restore_supported
+                            && keepalive_compatible,
                     })
                     .instrument(tracing::info_span!("shutdown_nvme_driver", %pci_id))
             }))
@@ -355,11 +361,14 @@ impl NvmeManagerWorker {
                 match guard.entry(pci_id.clone()) {
                     hash_map::Entry::Occupied(_) => unreachable!(), // We checked above that this entry does not exist.
                     hash_map::Entry::Vacant(entry) => {
+                        let keepalive_compatible = is_nvme_keepalive_compatible(&pci_id);
+
                         let driver = NvmeDriverManager::new(
                             &context.driver_source,
                             &pci_id,
                             context.vp_count,
-                            context.save_restore_supported,
+                            context.save_restore_supported && keepalive_compatible,
+                            keepalive_compatible,
                             None, // No device yet,
                             context.nvme_driver_spawner.clone(),
                         )?;
@@ -419,12 +428,24 @@ impl NvmeManagerWorker {
     /// Saves NVMe device's states into buffer during servicing.
     pub async fn save(&mut self) -> anyhow::Result<NvmeManagerSavedState> {
         let mut nvme_disks: Vec<NvmeSavedDiskConfig> = Vec::new();
+        // Only save state for devices known to be keepalive-compatible.
         let mut devices_to_save: HashMap<String, NvmeDriverManagerClient> = self
             .context
             .devices
             .write()
             .iter()
-            .map(|(pci_id, driver)| (pci_id.clone(), driver.client().clone()))
+            .filter_map(|(pci_id, driver)| {
+                if driver.keepalive_compatible() {
+                    Some((pci_id.clone(), driver.client().clone()))
+                } else {
+                    tracing::info!(
+                        %pci_id,
+                        "skipping save of nvme device; \
+                         keepalive disabled for this device"
+                    );
+                    None
+                }
+            })
             .collect();
         for (pci_id, client) in devices_to_save.iter_mut() {
             nvme_disks.push(NvmeSavedDiskConfig {
@@ -449,6 +470,11 @@ impl NvmeManagerWorker {
 
         for disk in &saved_state.nvme_disks {
             let pci_id = disk.pci_id.clone();
+
+            // Read the PCI vendor/device IDs once and cache the resulting
+            // keepalive compatibility flag on the `NvmeDriverManager`.
+            let keepalive_compatible = is_nvme_keepalive_compatible(&pci_id);
+
             let nvme_driver = self
                 .context
                 .nvme_driver_spawner
@@ -456,7 +482,7 @@ impl NvmeManagerWorker {
                     &self.context.driver_source,
                     &pci_id,
                     saved_state.cpu_count,
-                    save_restore_supported,
+                    save_restore_supported && keepalive_compatible,
                     Some(&disk.driver_state),
                 )
                 .await?;
@@ -468,6 +494,7 @@ impl NvmeManagerWorker {
                     &pci_id,
                     self.context.vp_count,
                     true, // save_restore_supported is always `true` when restoring.
+                    keepalive_compatible,
                     Some(nvme_driver),
                     self.context.nvme_driver_spawner.clone(),
                 )?,
@@ -1068,9 +1095,16 @@ mod tests {
         ));
 
         // Create a driver manager
-        let driver_manager =
-            NvmeDriverManager::new(&driver_source, "0000:00:04.0", 4, false, None, spawner)
-                .unwrap();
+        let driver_manager = NvmeDriverManager::new(
+            &driver_source,
+            "0000:00:04.0",
+            4,
+            false,
+            false,
+            None,
+            spawner,
+        )
+        .unwrap();
 
         let client = driver_manager.client().clone();
 
@@ -1154,6 +1188,7 @@ mod tests {
             "0000:00:05.0",
             4,
             true, // save_restore_supported
+            true,
             None,
             spawner,
         )
diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs
index cc8977de45..9244f7ba50 100644
--- a/openhcl/underhill_core/src/nvme_manager/mod.rs
+++ b/openhcl/underhill_core/src/nvme_manager/mod.rs
@@ -67,6 +67,14 @@ pub struct NamespaceError {
     source: NvmeSpawnerError,
 }
 
+/// PCI vendor ID, as it appears in the sysfs `vendor` file (e.g. `0x0100`),
+/// for NVMe devices that are incompatible with keepalive.
+const KEEPALIVE_INCOMPATIBLE_VENDOR_ID: &str = "0x1414";
+
+/// PCI device ID, as it appears in the sysfs `device` file (e.g. `0x0100`),
+/// for NVMe devices that are incompatible with keepalive.
+const KEEPALIVE_INCOMPATIBLE_DEVICE_ID: &str = "0xb111";
+
 #[derive(Debug, Error)]
 pub enum NvmeSpawnerError {
     #[error("failed to initialize vfio device")]
@@ -111,3 +119,45 @@ pub trait CreateNvmeDriver: Inspect + Send + Sync {
         saved_state: Option<&nvme_driver::save_restore::NvmeDriverSavedState>,
     ) -> Result<Box<dyn NvmeDevice>, NvmeSpawnerError>;
 }
+
+/// Returns whether the given PCI device is compatible with NVMe keepalive.
+pub(crate) fn is_nvme_keepalive_compatible(pci_id: &str) -> bool {
+    match read_pci_vendor_device_ids(&pci_id) {
+        Ok((vendor_id, device_id)) => {
+            vendor_id != KEEPALIVE_INCOMPATIBLE_VENDOR_ID
+                || device_id != KEEPALIVE_INCOMPATIBLE_DEVICE_ID
+        }
+        Err(err) => {
+            tracing::warn!(
+                pci_id = %pci_id,
+                error = err.as_ref() as &dyn std::error::Error,
+                "failed to read PCI vendor/device IDs; treating device as not keepalive-compatible"
+            );
+            false
+        }
+    }
+}
+
+/// Reads the sysfs `vendor` and `device` files for the given PCI device,
+/// returning the trimmed contents (e.g. `"0x0100"`).
+///
+/// Callers should invoke this once per device and cache the result, since
+/// the values do not change for the lifetime of the device.
+fn read_pci_vendor_device_ids(pci_id: &str) -> anyhow::Result<(String, String)> {
+    let devpath = std::path::Path::new("/sys/bus/pci/devices").join(pci_id);
+    let vendor = fs_err::read_to_string(devpath.join("vendor"))?
+        .trim_end()
+        .to_owned();
+    let device = fs_err::read_to_string(devpath.join("device"))?
+        .trim_end()
+        .to_owned();
+
+    tracing::info!(
+        pci_id = %pci_id,
+        vendor = %vendor,
+        device = %device,
+        "read PCI vendor/device IDs"
+    );
+
+    Ok((vendor, device))
+}
diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
index 25bb4472df..cc5c2e2916 100644
--- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
+++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
@@ -831,6 +831,7 @@ impl<D: DeviceBacking> NvmeDriver<D> {
                 tracing::info!(
                     id = a.qid,
                     pending_commands_count = a.handler_data.pending_cmds.commands.len(),
+                    base_pfn = a.base_pfn,
                     ?pci_id,
                     "restoring admin queue",
                 );
@@ -914,9 +915,10 @@ impl<D: DeviceBacking> NvmeDriver<D> {
                 .io
                 .iter()
                 .map(|io_state| format!(
-                    "{{qid={}, pending_commands_count={}}}",
+                    "{{qid={}, pending_commands_count={}, base_pfn={}}}",
                     io_state.queue_data.qid,
-                    io_state.queue_data.handler_data.pending_cmds.commands.len()
+                    io_state.queue_data.handler_data.pending_cmds.commands.len(),
+                    io_state.queue_data.base_pfn,
                 ))
                 .collect::<Vec<_>>()
                 .join(", "),
@@ -1270,7 +1272,7 @@ async fn handle_asynchronous_events(
 
 impl<D: DeviceBacking> Drop for NvmeDriver<D> {
     fn drop(&mut self) {
-        tracing::trace!(pci_id = ?self.device_id, ka = self.nvme_keepalive, task = self.task.is_some(), "dropping nvme driver");
+        tracing::info!(pci_id = ?self.device_id, ka = self.nvme_keepalive, task = self.task.is_some(), "dropping nvme driver");
         if self.task.is_some() {
             // Do not reset NVMe device when nvme_keepalive is requested.
             tracing::debug!(nvme_keepalive = self.nvme_keepalive, pci_id = ?self.device_id, "dropping nvme driver");
@@ -1308,32 +1310,29 @@ impl<D: DeviceBacking> AsyncRun<WorkerState> for DriverWorkerTask<D> {
         stop: &mut task_control::StopTask<'_>,
         state: &mut WorkerState,
     ) -> Result<(), task_control::Cancelled> {
-        let r = stop
-            .until_stopped(async {
-                loop {
-                    match self.recv.next().await {
-                        Some(NvmeWorkerRequest::CreateIssuer(rpc)) => {
-                            rpc.handle(async |cpu| self.create_io_issuer(state, cpu).await)
-                                .await
-                        }
-                        Some(NvmeWorkerRequest::Save(rpc)) => {
-                            rpc.handle(async |span| {
-                                let child_span = tracing::info_span!(
-                                    parent: &span,
-                                    "nvme_worker_save",
-                                    pci_id = %self.device.id()
-                                );
-                                self.save(state).instrument(child_span).await
-                            })
-                            .await
-                        }
-                        None => break,
-                    }
+        loop {
+            let cmd = stop.until_stopped(self.recv.next()).await?;
+            match cmd {
+                Some(NvmeWorkerRequest::CreateIssuer(rpc)) => {
+                    rpc.handle(async |cpu| self.create_io_issuer(state, cpu).await)
+                        .await
                 }
-            })
-            .await;
-        tracing::info!(pci_id = %self.device.id(), "nvme worker task exiting");
-        r
+                Some(NvmeWorkerRequest::Save(rpc)) => {
+                    rpc.handle(async |span| {
+                        let child_span = tracing::info_span!(
+                            parent: &span,
+                            "nvme_worker_save",
+                            pci_id = %self.device.id()
+                        );
+                        self.save(state).instrument(child_span).await
+                    })
+                    .await
+                }
+                None => break,
+            }
+        }
+        tracing::info!(pci_id = %self.device.id(), "nvme worker task exiting cleanly");
+        Ok(())
     }
 }
 
@@ -1398,7 +1397,7 @@ impl<D: DeviceBacking> DriverWorkerTask<D> {
     }
 
     async fn create_io_issuer(&mut self, state: &mut WorkerState, cpu: u32) {
-        tracing::debug!(cpu, pci_id = ?self.device.id(), "issuer request");
+        tracing::info!(cpu, pci_id = ?self.device.id(), "create io issuer request");
         if self.io_issuers.per_cpu[cpu as usize].get().is_some() {
             return;
         }
@@ -1496,7 +1495,7 @@ impl<D: DeviceBacking> DriverWorkerTask<D> {
         let iv = qid - 1;
         self.next_ioq_id += 1;
 
-        tracing::debug!(cpu, qid, iv, pci_id = ?self.device.id(), "creating io queue");
+        tracing::info!(cpu, qid, iv, pci_id = ?self.device.id(), "create_nvme_io_queue_fn: creating io queue");
 
         let interrupt = self
             .device
@@ -1516,7 +1515,7 @@ impl<D: DeviceBacking> DriverWorkerTask<D> {
                 qid,
                 cpu,
                 pci_id = ?self.device.id(),
-                "created io queue in SelfDrained state"
+                "create_nvme_io_queue_fn: created io queue in SelfDrained state"
             );
         }
 
@@ -1533,6 +1532,7 @@ impl<D: DeviceBacking> DriverWorkerTask<D> {
             drain_after_restore,
         )
         .map_err(|err| DeviceError::IoQueuePairCreationFailure(err, qid))?;
+        tracing::info!(cpu, qid, pci_id = ?self.device.id(), "create_nvme_io_queue_fn: created the internal queue pair object.");
 
         assert_eq!(queue.sq_entries(), queue.cq_entries());
         state.qsize = queue.sq_entries();
@@ -1599,6 +1599,7 @@ impl<D: DeviceBacking> DriverWorkerTask<D> {
         };
 
         if let Err(err) = r.await {
+            tracing::info!(cpu, qid, pci_id = ?self.device.id(), created_completion_queue, error = ?err, "create_nvme_io_queue_fn: failed to create IO queue, tearing down and deleting internal resources.");
             if created_completion_queue {
                 if let Err(err) = admin
                     .issue_raw(spec::Command {
@@ -1619,6 +1620,7 @@ impl<D: DeviceBacking> DriverWorkerTask<D> {
             return Err(DeviceError::Other(err));
         }
 
+        tracing::info!(cpu, qid, pci_id = ?self.device.id(), "create_nvme_io_queue_fn: successfuly created io queue.");
         Ok(IoIssuer {
             issuer: io_queue.queue.issuer().clone(),
             cpu,
@@ -1703,6 +1705,7 @@ impl<D: DeviceBacking> DriverWorkerTask<D> {
                         pci_id = ?self.device.id(),
                         id = admin_state.qid,
                         pending_commands_count = admin_state.handler_data.pending_cmds.commands.len(),
+                        pfn = admin_state.base_pfn,
                         "saved admin queue",
                     );
                     Some(admin_state)
@@ -1758,9 +1761,10 @@ impl<D: DeviceBacking> DriverWorkerTask<D> {
                 state = io
                     .iter()
                     .map(|io_state| format!(
-                        "{{qid={}, pending_commands_count={}}}",
+                        "{{qid={}, pending_commands_count={}, base_pfn={}}}",
                         io_state.queue_data.qid,
-                        io_state.queue_data.handler_data.pending_cmds.commands.len()
+                        io_state.queue_data.handler_data.pending_cmds.commands.len(),
+                        io_state.queue_data.base_pfn
                     ))
                     .collect::<Vec<_>>()
                     .join(", "),
diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs
index 73fc432a53..5b6eb831b2 100644
--- a/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs
+++ b/vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs
@@ -93,11 +93,12 @@ impl PendingCommands {
     const MAX_CIDS: usize = 1 << Self::CID_KEY_BITS;
     const CID_SEQ_OFFSET: Wrapping<u16> = Wrapping(1 << Self::CID_KEY_BITS);
 
-    fn new(qid: u16) -> Self {
+    fn new(qid: u16, device_id: String) -> Self {
         Self {
             commands: Slab::new(),
             next_cid_high_bits: Wrapping(0),
             qid,
+            device_id,
         }
     }
 
@@ -124,7 +125,19 @@ impl PendingCommands {
         entry.insert(PendingCommand {
             command: *command,
             respond,
-            submitted_at: (self.qid == 0).then(Instant::now),
+            submitted_at: (self.qid == 0).then(|| {
+                tracing::info!(
+                    pci_id = %self.device_id,
+                    ?cid,
+                    opcode = ?command.cdw0.opcode(),
+                    opname = ?spec::AdminOpcode(command.cdw0.opcode()),
+                    nsid = ?command.nsid,
+                    cdw10 = command.cdw10,
+                    cdw11 = command.cdw11,
+                    "inserted admin command",
+                );
+                Instant::now()
+            }),
         });
     }
 
@@ -140,6 +153,19 @@ impl PendingCommands {
             self.qid,
             command.command.cdw0.opcode(),
         );
+        if let Some(submitted_at) = command.submitted_at {
+            tracing::info!(
+                pci_id = %self.device_id,
+                ?cid,
+                opcode = ?command.command.cdw0.opcode(),
+                opname = ?spec::AdminOpcode(command.command.cdw0.opcode()),
+                nsid = ?command.command.nsid,
+                cdw10 = command.command.cdw10,
+                cdw11 = command.command.cdw11,
+                elapsed = ?submitted_at.elapsed(),
+                "completed admin command",
+            );
+        }
         command.respond
     }
 
@@ -161,7 +187,11 @@ impl PendingCommands {
     }
 
     /// Restore pending commands from the saved state.
-    pub fn restore(saved_state: &PendingCommandsSavedState, qid: u16) -> anyhow::Result<Self> {
+    pub fn restore(
+        saved_state: &PendingCommandsSavedState,
+        qid: u16,
+        device_id: String,
+    ) -> anyhow::Result<Self> {
         let PendingCommandsSavedState {
             commands,
             next_cid_high_bits,
@@ -188,6 +218,7 @@ impl PendingCommands {
                 .collect::<Slab<PendingCommand>>(),
             next_cid_high_bits: Wrapping(*next_cid_high_bits),
             qid,
+            device_id,
         })
     }
 }
@@ -508,7 +539,7 @@ impl<A: AerHandler, D: DeviceBacking> QueuePair<A, D> {
                 QueueHandler {
                     sq: SubmissionQueue::new(qid, sq_entries, sq_mem_block),
                     cq: CompletionQueue::new(qid, cq_entries, cq_mem_block),
-                    commands: PendingCommands::new(qid),
+                    commands: PendingCommands::new(qid, device_id.into()),
                     stats: Default::default(),
                     drain_after_restore,
                     aer_handler,
@@ -947,6 +978,7 @@ struct PendingCommands {
     #[inspect(hex)]
     next_cid_high_bits: Wrapping<u16>,
     qid: u16,
+    device_id: String,
 }
 
 #[derive(Inspect)]
@@ -1360,7 +1392,7 @@ impl<A: AerHandler> QueueHandler<A> {
         Ok(Self {
             sq: SubmissionQueue::restore(sq_mem_block, sq_state)?,
             cq: CompletionQueue::restore(cq_mem_block, cq_state)?,
-            commands: PendingCommands::restore(pending_cmds, sq_state.sqid)?,
+            commands: PendingCommands::restore(pending_cmds, sq_state.sqid, device_id.into())?,
             stats: Default::default(),
             // Only drain pending commands for I/O queues.
             // Admin queue is expected to have pending Async Event requests.
diff --git a/vm/devices/storage/nvme_resources/src/fault.rs b/vm/devices/storage/nvme_resources/src/fault.rs
index 8fcad86766..8ed05989b6 100644
--- a/vm/devices/storage/nvme_resources/src/fault.rs
+++ b/vm/devices/storage/nvme_resources/src/fault.rs
@@ -315,6 +315,42 @@ pub struct CommandMatch {
     pub mask: [u8; 64],
 }
 
+/// A fault configuration for overriding the device's hardware configuration
+/// (i.e. the values reported in PCI configuration space).
+///
+/// When a field is `Some`, its value replaces the corresponding field reported
+/// by the NVMe fault controller's PCI hardware IDs. When `None`, the real
+/// hardware ID value is used unchanged. Unlike most other faults in this
+/// module, hardware config faults are applied at device construction time
+/// and are therefore not gated by the `fault_active` flag.
+///
+/// # Example
+/// Override the Vendor ID and Device ID reported in PCI config space.
+/// ```no_run
+/// use mesh::CellUpdater;
+/// use nvme_resources::fault::FaultConfiguration;
+/// use nvme_resources::fault::HardwareConfigFaultConfig;
+///
+/// pub fn hardware_config_fault() -> FaultConfiguration {
+///     let mut fault_start_updater = CellUpdater::new(false);
+///     FaultConfiguration::new(fault_start_updater.cell())
+///         .with_hardware_config_fault(
+///             HardwareConfigFaultConfig::new()
+///                 .with_vendor_id(0x1414)
+///                 .with_device_id(0x00a9),
+///         )
+/// }
+/// ```
+#[derive(Debug, Copy, Clone, Default, MeshPayload)]
+pub struct HardwareConfigFaultConfig {
+    /// Override for the device's PCI Vendor ID. When `None`, the real Vendor
+    /// ID is used.
+    pub vendor_id: Option<u16>,
+    /// Override for the device's PCI Device ID. When `None`, the real Device
+    /// ID is used.
+    pub device_id: Option<u16>,
+}
+
 /// Fault configuration for the NVMe fault controller.
 ///
 /// This struct defines behaviors that inject faults into the NVMe fault controller logic,
@@ -390,6 +426,8 @@ pub struct FaultConfiguration {
     pub namespace_fault: NamespaceFaultConfig,
     /// Fault to apply to all IO queues
     pub io_fault: Arc<IoQueueFaultConfig>,
+    /// Fault to apply to the Hardware Configuration
+    pub hardware_config_fault: Option<HardwareConfigFaultConfig>,
 }
 
 impl FaultConfiguration {
@@ -404,6 +442,7 @@ impl FaultConfiguration {
             pci_fault: Some(PciFaultConfig::new()),
             namespace_fault: NamespaceFaultConfig::new(mesh::channel().1),
             io_fault: Arc::new(IoQueueFaultConfig::new(fault_active)),
+            hardware_config_fault: None,
         }
     }
 
@@ -430,6 +469,35 @@ impl FaultConfiguration {
         self.namespace_fault = namespace_fault;
         self
     }
+
+    /// Add a hardware config fault configuration to the fault configuration
+    pub fn with_hardware_config_fault(
+        mut self,
+        hardware_config_fault: HardwareConfigFaultConfig,
+    ) -> Self {
+        self.hardware_config_fault = Some(hardware_config_fault);
+        self
+    }
+}
+
+impl HardwareConfigFaultConfig {
+    /// Create a new no-op hardware config fault configuration. All fields
+    /// default to `None`, meaning the real hardware values are used.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Override the PCI Vendor ID reported by the device.
+    pub fn with_vendor_id(mut self, vendor_id: u16) -> Self {
+        self.vendor_id = Some(vendor_id);
+        self
+    }
+
+    /// Override the PCI Device ID reported by the device.
+    pub fn with_device_id(mut self, device_id: u16) -> Self {
+        self.device_id = Some(device_id);
+        self
+    }
 }
 
 impl PciFaultConfig {
diff --git a/vm/devices/storage/nvme_test/src/pci.rs b/vm/devices/storage/nvme_test/src/pci.rs
index 278793d3e8..396951f19b 100644
--- a/vm/devices/storage/nvme_test/src/pci.rs
+++ b/vm/devices/storage/nvme_test/src/pci.rs
@@ -137,10 +137,21 @@ impl NvmeFaultController {
                 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
             );
 
+        // Apply any hardware-config fault overrides for the IDs reported in
+        // PCI configuration space, falling back to the real values when no
+        // override is configured.
+        let hardware_config_fault = fault_configuration.hardware_config_fault.take();
+        let vendor_id = hardware_config_fault
+            .and_then(|f| f.vendor_id)
+            .unwrap_or(VENDOR_ID);
+        let device_id = hardware_config_fault
+            .and_then(|f| f.device_id)
+            .unwrap_or(0x00a9);
+
         let cfg_space = ConfigSpaceType0Emulator::new(
             HardwareIds {
-                vendor_id: VENDOR_ID,
-                device_id: 0x00a9,
+                vendor_id,
+                device_id,
                 revision_id: 0,
                 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
                 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
index e3c74aa96f..997188c5fe 100644
--- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
+++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs
@@ -19,6 +19,7 @@ use nvme_resources::NvmeFaultControllerHandle;
 use nvme_resources::fault::AdminQueueFaultBehavior;
 use nvme_resources::fault::AdminQueueFaultConfig;
 use nvme_resources::fault::FaultConfiguration;
+use nvme_resources::fault::HardwareConfigFaultConfig;
 use nvme_resources::fault::IoQueueFaultBehavior;
 use nvme_resources::fault::IoQueueFaultConfig;
 use nvme_resources::fault::NamespaceChange;
@@ -147,9 +148,7 @@ async fn servicing_keepalive_no_device<T: PetriVmmBackend>(
 ) -> anyhow::Result<()> {
     let flags = config.default_servicing_flags();
     openhcl_servicing_core(
-        config.with_openhcl_command_line(
-            "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-        ),
+        config.with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512"),
         igvm_file,
         flags,
         DEFAULT_SERVICING_COUNT,
@@ -198,9 +197,7 @@ async fn servicing_keepalive_with_device<T: PetriVmmBackend>(
     let flags = config.default_servicing_flags();
     openhcl_servicing_core(
         config
-            .with_openhcl_command_line(
-                "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-            )
+            .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512")
             .with_boot_device_type(petri::BootDeviceType::ScsiViaNvme)
             .with_vmbus_redirect(true), // Need this to attach the NVMe device
         igvm_file,
@@ -1169,6 +1166,196 @@ async fn servicing_keepalive_slow_create_io_queue(
     Ok(())
 }
 
+/// Verifies the per-device NVMe keepalive gate.
+///
+/// Two NVMe controllers are attached to a single VM:
+/// * A "c05b" device whose VPCI instance ID has `data2 = 0xc05b`, which
+///   makes its derived pci_id start with `c05b:` (matching the
+///   keepalive-compatible heuristic). Keepalive must be honored for
+///   this device — its controller is not reset across servicing, so
+///   `CREATE_IO_COMPLETION_QUEUE` must NOT be issued after servicing.
+///   The fault panics if the opcode is observed.
+/// * A device whose pci_id does not start with
+///   `c05b:`. Keepalive must be downgraded to reset-on-servicing for
+///   this device, so `CREATE_IO_COMPLETION_QUEUE` MUST be issued after
+///   servicing. The fault verifies (via a oneshot) that the opcode is
+///   observed.
+///
+/// Keepalive is enabled VM-wide; the gate is expected to apply per
+/// device based on its pci_id.
+#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])]
+async fn servicing_keepalive_per_device_gate(
+    config: PetriVmBuilder<OpenVmmPetriBackend>,
+    (igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
+) -> Result<(), anyhow::Error> {
+    let mut flags = config.default_servicing_flags();
+    flags.enable_nvme_keepalive = true;
+
+    // VPCI instance IDs. The pci_id seen by the NVMe driver is derived
+    // as `format!("{:04x}:00:00.0", instance_id.data2)` (see
+    // `vpci_path` in `dispatch/vtl2_settings_worker.rs`), so `data2`
+    // controls the prefix of the pci_id. For the device with keepalive we set data2 = 0xc05b
+    // so its pci_id is `c05b:00:00.0`.
+    const WITH_KEEPALIVE_NVME_INSTANCE: Guid = guid::guid!("00000000-c05b-0000-0000-000000000001");
+    const NO_KEEPALIVE_NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
+
+    const WITH_KEEPALIVE_NSID: u32 = KEEPALIVE_VTL2_NSID;
+    const NO_KEEPALIVE_NSID: u32 = KEEPALIVE_VTL2_NSID + 1;
+    const WITH_KEEPALIVE_LUN: u32 = VTL0_NVME_LUN;
+    const NO_KEEPALIVE_LUN: u32 = VTL0_NVME_LUN + 1;
+
+    // Two independent fault start cells — one per device.
+    let mut with_keepalive_fault_updater = CellUpdater::new(false);
+    let mut no_keepalive_fault_updater = CellUpdater::new(false);
+
+    let (no_keepalive_create_seen_send, no_keepalive_create_seen_recv) = mesh::oneshot::<()>();
+
+    // c05b device: keepalive must be honored — fail loudly if any
+    // CREATE_IO_COMPLETION_QUEUE is observed after the fault is armed.
+    let with_keepalive_fault_config = FaultConfiguration::new(with_keepalive_fault_updater.cell())
+        .with_admin_queue_fault(
+            AdminQueueFaultConfig::new().with_submission_queue_fault(
+                CommandMatchBuilder::new()
+                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
+                    .build(),
+                AdminQueueFaultBehavior::Panic(
+                    "c05b device received CREATE_IO_COMPLETION_QUEUE after servicing — \
+                     keepalive should have been honored for this device but the controller \
+                     was reset."
+                        .to_string(),
+                ),
+            ),
+        );
+
+    // Non c05b device: keepalive must be downgraded —
+    // verify CREATE_IO_COMPLETION_QUEUE IS issued after servicing.
+    let no_keepalive_fault_config = FaultConfiguration::new(no_keepalive_fault_updater.cell())
+        .with_admin_queue_fault(
+            AdminQueueFaultConfig::new().with_submission_queue_fault(
+                CommandMatchBuilder::new()
+                    .match_cdw0_opcode(nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0)
+                    .build(),
+                AdminQueueFaultBehavior::Verify(Some(no_keepalive_create_seen_send)),
+            ),
+        )
+        .with_hardware_config_fault(
+            HardwareConfigFaultConfig::new()
+                .with_vendor_id(0x1414)
+                .with_device_id(0xb111),
+        );
+
+    let scsi_instance = Guid::new_random();
+
+    let (mut vm, agent) = config
+        .with_vmbus_redirect(true)
+        .with_openhcl_command_line(
+            "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+        )
+        .modify_backend(move |b| {
+            b.with_custom_config(move |c| {
+                c.vpci_devices.push(VpciDeviceConfig {
+                    vtl: DeviceVtl::Vtl2,
+                    instance_id: NO_KEEPALIVE_NVME_INSTANCE,
+                    resource: NvmeFaultControllerHandle {
+                        subsystem_id: Guid::new_random(),
+                        msix_count: 10,
+                        max_io_queues: 10,
+                        namespaces: vec![NamespaceDefinition {
+                            nsid: NO_KEEPALIVE_NSID,
+                            read_only: false,
+                            disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
+                                len: Some(DEFAULT_DISK_SIZE),
+                                sector_size: None,
+                            })
+                            .into_resource(),
+                        }],
+                        fault_config: no_keepalive_fault_config,
+                        enable_tdisp_tests: false,
+                    }
+                    .into_resource(),
+                });
+                c.vpci_devices.push(VpciDeviceConfig {
+                    vtl: DeviceVtl::Vtl2,
+                    instance_id: WITH_KEEPALIVE_NVME_INSTANCE,
+                    resource: NvmeFaultControllerHandle {
+                        subsystem_id: Guid::new_random(),
+                        msix_count: 10,
+                        max_io_queues: 10,
+                        namespaces: vec![NamespaceDefinition {
+                            nsid: WITH_KEEPALIVE_NSID,
+                            read_only: false,
+                            disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
+                                len: Some(DEFAULT_DISK_SIZE),
+                                sector_size: None,
+                            })
+                            .into_resource(),
+                        }],
+                        fault_config: with_keepalive_fault_config,
+                        enable_tdisp_tests: false,
+                    }
+                    .into_resource(),
+                });
+            })
+        })
+        .add_vtl2_storage_controller(
+            Vtl2StorageControllerBuilder::new(ControllerType::Scsi)
+                .with_instance_id(scsi_instance)
+                .add_lun(
+                    Vtl2LunBuilder::disk()
+                        .with_location(NO_KEEPALIVE_LUN)
+                        .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
+                            ControllerType::Nvme,
+                            NO_KEEPALIVE_NVME_INSTANCE,
+                            NO_KEEPALIVE_NSID,
+                        )),
+                )
+                .add_lun(
+                    Vtl2LunBuilder::disk()
+                        .with_location(WITH_KEEPALIVE_LUN)
+                        .with_physical_device(Vtl2StorageBackingDeviceBuilder::new(
+                            ControllerType::Nvme,
+                            WITH_KEEPALIVE_NVME_INSTANCE,
+                            WITH_KEEPALIVE_NSID,
+                        )),
+                )
+                .build(),
+        )
+        .run()
+        .await?;
+
+    agent.ping().await?;
+
+    // Arm both faults BEFORE servicing so that the post-servicing
+    // CREATE_IO_COMPLETION_QUEUE is what each fault matches.
+    with_keepalive_fault_updater.set(true).await;
+    no_keepalive_fault_updater.set(true).await;
+
+    vm.restart_openhcl(igvm_file.clone(), flags).await?;
+
+    agent.ping().await?;
+
+    // The non-c05b device must issue CREATE_IO_COMPLETION_QUEUE after
+    // servicing because its keepalive was downgraded to reset.
+    CancelContext::new()
+        .with_timeout(Duration::from_secs(60))
+        .until_cancelled(no_keepalive_create_seen_recv)
+        .await
+        .expect(
+            "non-c05b NVMe device did not issue CREATE_IO_COMPLETION_QUEUE within 60s after \
+             servicing — the per-device keepalive gate did not downgrade keepalive for this \
+             device.",
+        )
+        .expect("CREATE_IO_COMPLETION_QUEUE verification on non-c05b device failed");
+
+    // If the c05b device had received CREATE_IO_COMPLETION_QUEUE, the
+    // panic fault would have crashed the VM and failed this test. We
+    // disarm the faults defensively before tearing down.
+    with_keepalive_fault_updater.set(false).await;
+    no_keepalive_fault_updater.set(false).await;
+
+    Ok(())
+}
+
 async fn apply_fault_with_keepalive(
     config: PetriVmBuilder<OpenVmmPetriBackend>,
     fault_configuration: FaultConfiguration,
@@ -1219,9 +1406,7 @@ async fn create_keepalive_test_config_default(
 
     config
         .with_vmbus_redirect(true)
-        .with_openhcl_command_line(
-            "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-        )
+        .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512")
         .modify_backend(move |b| {
             b.with_custom_config(|c| {
                 // Add a fault controller to test the nvme controller functionality
@@ -1309,9 +1494,9 @@ async fn create_keepalive_test_config_custom(
 ) -> Result<(PetriVm<OpenVmmPetriBackend>, PipetteClient), anyhow::Error> {
     const NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
 
-    let mut builder = config.with_vmbus_redirect(true).with_openhcl_command_line(
-        "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
-    );
+    let mut builder = config
+        .with_vmbus_redirect(true)
+        .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512");
 
     for cmdline in extra_cmdlines {
         builder = builder.with_openhcl_command_line(cmdline);
diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs
index b43f0ec855..593e78dcc6 100644
--- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs
+++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs
@@ -193,7 +193,7 @@ async fn nvme_relay_explicit_private_pool(
     nvme_relay_test_core(
         config,
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+            openhcl_cmdline: "OPENHCL_ENABLE_VTL2_GPA_POOL=512",
             expected_props: Some(ExpectedNvmeDeviceProperties {
                 save_restore_supported: true,
                 qsize: 256, // private pool should allow contiguous allocations.
@@ -218,7 +218,7 @@ async fn nvme_relay_heuristic_debug_16vp_768mb_heavy(
     nvme_relay_test_core(
         config,
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+            openhcl_cmdline: "",
             processor_topology: Some(ProcessorTopology::heavy()),
             vtl2_base_address_type: Some(openvmm_defs::config::Vtl2BaseAddressType::Vtl2Allocate {
                 size: Some(768 * 1024 * 1024),
@@ -246,7 +246,7 @@ async fn nvme_relay_heuristic_release_16vp_256mb_heavy(
     nvme_relay_test_core(
         config,
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+            openhcl_cmdline: "",
             processor_topology: Some(ProcessorTopology::heavy()),
             vtl2_base_address_type: Some(openvmm_defs::config::Vtl2BaseAddressType::Vtl2Allocate {
                 size: Some(256 * 1024 * 1024),
@@ -277,7 +277,7 @@ async fn nvme_relay_heuristic_release_32vp_500mb_very_heavy(
     nvme_relay_test_core(
         config,
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_DISABLE_NVME_KEEP_ALIVE=0",
+            openhcl_cmdline: "",
             processor_topology: Some(ProcessorTopology::very_heavy()),
             vtl2_base_address_type: Some(openvmm_defs::config::Vtl2BaseAddressType::Vtl2Allocate {
                 size: Some(500 * 1024 * 1024),
@@ -307,7 +307,7 @@ async fn nvme_relay_32vp_768mb_very_heavy(
             ..Default::default()
         }),
         NvmeRelayTestParams {
-            openhcl_cmdline: "OPENHCL_DISABLE_NVME_KEEP_ALIVE=0 OPENHCL_ENABLE_VTL2_GPA_POOL=10240",
+            openhcl_cmdline: "OPENHCL_ENABLE_VTL2_GPA_POOL=10240",
             processor_topology: Some(ProcessorTopology::very_heavy()),
             vtl2_base_address_type: Some(openvmm_defs::config::Vtl2BaseAddressType::Vtl2Allocate {
                 size: Some(768 * 1024 * 1024),