From 83ffad44e38bcf94e008562dfa24582ddf6fcbc1 Mon Sep 17 00:00:00 2001 From: Chet Nichols III Date: Tue, 16 Jun 2026 13:59:06 -0700 Subject: [PATCH] fix(site-explorer): enforce the declared DPU mode on fallback-serial matches The per-host matching loop now runs the same NIC/DPU mode check on a DPU paired through `fallback_dpu_serial_numbers` as it does on one the host reports over PCIe: a BlueField in the wrong mode gets `set_nic_mode` and the host is power-cycled to apply it, instead of being trusted as already configured. That reset now fires even when the host BMC never enumerated the DPU over PCIe -- the usual reason we end up on the fallback path -- so the queued flip can actually take effect. Until now the fallback path attached a matched DPU with no mode check. On a host the operator declared `nic_mode`, that DPU would then be dropped (a NIC-mode host has no managed DPUs), so the host registered as zero-DPU with the flip never issued -- the database read "NIC-mode host" while the BlueField stayed in DPU mode. Incomplete PCIe enumeration (a GB200 dropping a DPU from its inventory, say) is exactly what pushes a DPU-to-NIC migration onto this path, so this was the common case, not an edge. Adds a regression test -- a NIC-mode host whose DPU is paired only by fallback serial and still reporting DPU mode -- that fails on the old code (the host registers zero-DPU with no `set_nic_mode`) and passes now. Signed-off-by: Chet Nichols III --- crates/site-explorer/src/lib.rs | 90 +++++++++----- crates/site-explorer/tests/site_explorer.rs | 126 ++++++++++++++++++++ 2 files changed, 186 insertions(+), 30 deletions(-) diff --git a/crates/site-explorer/src/lib.rs b/crates/site-explorer/src/lib.rs index a4e3e75964..30fbd68458 100644 --- a/crates/site-explorer/src/lib.rs +++ b/crates/site-explorer/src/lib.rs @@ -1149,7 +1149,7 @@ impl SiteExplorer { let DpuExplorationState { reported_total: host_reported_dpus_total, running_as_nic_total: mut host_reported_dpus_nic_mode_total, - all_configured: all_dpus_configured_properly_in_host, + all_configured: mut all_dpus_configured_properly_in_host, running_as_dpu: mut dpus_explored_for_host, } = dpu_exploration; @@ -1166,30 +1166,53 @@ impl SiteExplorer { { for dpu_sn in &expected_machine.data.fallback_dpu_serial_numbers { if let Some(dpu_ep) = dpu_sn_to_endpoint.remove(dpu_sn.as_str()) { - // We do not want to attach bluefields that are in NIC mode as DPUs to the host - if is_dpu_in_nic_mode(&dpu_ep, &ep) - && host_reported_dpus_total - .saturating_sub(host_reported_dpus_nic_mode_total) - > 0 - { - host_reported_dpus_nic_mode_total += 1; - continue; - } + // Enforce the host's declared DPU mode on a fallback-serial + // match the same way the host-reported path does, rather than + // trusting it as already-configured. A DPU still in the wrong + // mode gets a `set_nic_mode` here and has to wait for the host + // reset to apply it; without this, a DPU-mode BlueField on a + // `NicMode` host would be attached and then dropped to zero-DPU + // (the `NicMode` arm further down), leaving the database reading + // "NIC-mode host" while the hardware stayed in DPU mode. + let mode_check = Some( + self.check_and_configure_dpu_mode( + &dpu_ep, + dpu_ep.report.model().unwrap_or_default(), + host_dpu_mode, + ) + .await, + ); - // we found at least one DPU from expected machines for this host - // assume that the expected machines is the source of truth. Clear the - // contents of dpus_explored_for_host to discard the previous results of - // iterating over the hosts pcie devices. - if !dpu_added { - dpus_explored_for_host.clear(); + match classify_matched_dpu(&dpu_ep, &ep, mode_check) { + DiscoveredDpu::RunningAsDpu(dpu) => { + // The expected-machine fallback list is the source of + // truth here, so discard whatever the PCIe scan found + // on the first confirmed match. + if !dpu_added { + dpus_explored_for_host.clear(); + } + dpu_added = true; + dpus_explored_for_host.push(dpu); + } + DiscoveredDpu::RunningAsNic => { + host_reported_dpus_nic_mode_total += 1; + } + DiscoveredDpu::NeedsReconfig => { + // `set_nic_mode` was just issued; the host needs a + // reset before this DPU re-reports in the new mode, so + // mark it not-yet-configured and let the reset path + // below run. + all_dpus_configured_properly_in_host = false; + } + DiscoveredDpu::ModeCheckFailed(err) => { + tracing::warn!( + dpu = %dpu_ep.address, + dpu_sn = %dpu_sn, + error = %err, + "failed to check fallback-matched DPU mode; skipping this device this pass", + ); + } } - - dpu_added = true; - dpus_explored_for_host.push(ExploredDpu { - bmc_ip: dpu_ep.address, - host_pf_mac_address: get_host_pf_mac_address(&dpu_ep), - report: dpu_ep.report.into(), - }); } } } @@ -1203,13 +1226,20 @@ impl SiteExplorer { // confirmed to be running as plain NICs. let expected_managed_dpus_total = host_reported_dpus_total.saturating_sub(host_reported_dpus_nic_mode_total); - if expected_managed_dpus_total > 0 { - tracing::warn!( - address = %ep.address, - exploration_report = ?ep, - "cannot identify managed host because the site explorer has only discovered {} out of the {} attached DPUs (all_dpus_configured_properly_in_host={all_dpus_configured_properly_in_host}):\n{:#?}", - dpus_explored_for_host.len(), expected_managed_dpus_total, dpus_explored_for_host - ); + // Enter the reset/wait path when DPUs are still expected to pair, or + // when a `set_nic_mode` was just issued -- a fallback-serial match can + // queue a flip even on a host whose BMC reports no DPU over PCIe + // (`expected_managed_dpus_total == 0`), which is the usual reason we are + // on the fallback path at all. + if expected_managed_dpus_total > 0 || !all_dpus_configured_properly_in_host { + if expected_managed_dpus_total > 0 { + tracing::warn!( + address = %ep.address, + exploration_report = ?ep, + "cannot identify managed host because the site explorer has only discovered {} out of the {} attached DPUs (all_dpus_configured_properly_in_host={all_dpus_configured_properly_in_host}):\n{:#?}", + dpus_explored_for_host.len(), expected_managed_dpus_total, dpus_explored_for_host + ); + } if !all_dpus_configured_properly_in_host { // A queued `set_nic_mode` only takes effect after a host diff --git a/crates/site-explorer/tests/site_explorer.rs b/crates/site-explorer/tests/site_explorer.rs index 8275780854..f62f085d8e 100644 --- a/crates/site-explorer/tests/site_explorer.rs +++ b/crates/site-explorer/tests/site_explorer.rs @@ -2606,6 +2606,132 @@ async fn test_site_explorer_power_cycles_non_dell_host_to_apply_nic_mode( Ok(()) } +/// Regression guard for the fallback-serial path (#2631): a DPU paired only +/// through `fallback_dpu_serial_numbers` must get the same NIC-mode enforcement +/// as a host-reported one. The host BMC here enumerates no DPU over PCIe -- the +/// usual reason the fallback exists (e.g. a GB200 that drops a DPU from its +/// inventory) -- so the only link is the operator-listed serial, and the DPU is +/// still reporting DPU mode against a `NicMode` host. +/// +/// Before the fix the fallback path trusted the match as already-configured: it +/// attached the DPU without a mode check, then dropped it to zero-DPU, so the +/// host registered as a NIC-mode host while the BlueField stayed in DPU mode and +/// `set_nic_mode` was never issued. Now the flip is issued, the host is +/// power-cycled to apply it, and the host waits instead of settling this pass. +#[sqlx_test] +async fn test_site_explorer_enforces_nic_mode_on_fallback_serial_match( + pool: PgPool, +) -> Result<(), Box> { + use model::expected_machine::{DpuMode, ExpectedMachine, ExpectedMachineData}; + use model::site_explorer::NicMode; + + let env = Env::new(pool).await; + + const FALLBACK_DPU_SERIAL: &str = "fallback-only-dpu-serial"; + // DPU reports DPU mode; the host report carries no DPU device, so the + // serial is the only thing that can pair them. + let dpu_config = DpuConfig { + nic_mode: Some(NicMode::Dpu), + serial: FALLBACK_DPU_SERIAL.to_string(), + ..DpuConfig::default() + }; + let mock_host = ManagedHostConfig::default(); + let host_bmc_mac = mock_host.bmc_mac_address; + + // Operator declares the host NIC mode and lists the DPU's serial as a + // pairing fallback. + let mut txn = env.pool.begin().await?; + db::expected_machine::create( + &mut txn, + ExpectedMachine { + id: None, + bmc_mac_address: host_bmc_mac, + data: ExpectedMachineData { + bmc_username: "ADMIN".to_string(), + bmc_password: "PASS".to_string(), + serial_number: "EM-2631-FALLBACK-NIC".to_string(), + metadata: model::metadata::Metadata::new_with_default_name(), + dpu_mode: DpuMode::NicMode, + fallback_dpu_serial_numbers: vec![FALLBACK_DPU_SERIAL.to_string()], + ..Default::default() + }, + }, + ) + .await?; + txn.commit().await?; + + let mut host_bmc = env.new_machine(&host_bmc_mac.to_string(), "SomeVendor"); + let mut dpu_bmc = env.new_machine(&dpu_config.bmc_mac_address.to_string(), "NVIDIA/BF/BMC"); + host_bmc.discover_dhcp(env.api()).await?; + dpu_bmc.discover_dhcp(env.api()).await?; + + let explorer_config = SiteExplorerConfig { + enabled: Arc::new(true.into()), + retained_boot_interface_window: None, + explorations_per_run: 10, + concurrent_explorations: 1, + run_interval: std::time::Duration::from_secs(1), + create_machines: Arc::new(true.into()), + ..Default::default() + }; + let explorer = env.test_site_explorer(explorer_config); + explorer.insert_endpoint_results(vec![ + (dpu_bmc.ip.parse().unwrap(), Ok(dpu_config.clone().into())), + (host_bmc.ip.parse().unwrap(), Ok(mock_host.into())), + ]); + + // First iteration: initial endpoint exploration. + explorer.run_single_iteration().await.unwrap(); + let mut txn = env.pool.begin().await?; + for ip in [host_bmc.ip.parse()?, dpu_bmc.ip.parse()?] { + db::explored_endpoints::set_preingestion_complete(ip, &mut txn).await?; + } + txn.commit().await?; + // Second iteration: per-host matching falls through to the fallback-serial + // path, which must enforce the declared NIC mode. + explorer.run_single_iteration().await.unwrap(); + + { + let calls = explorer + .endpoint_explorer() + .set_nic_mode_calls + .lock() + .unwrap(); + assert!( + calls.iter().any(|(_, mode)| *mode == NicMode::Nic), + "fallback-matched DPU on a NicMode host should get set_nic_mode(Nic); calls so far: {calls:?}" + ); + } + + // The host must not settle as a zero-DPU managed host until the flip has + // applied -- otherwise the database reads "NIC-mode host" while the + // BlueField is still physically in DPU mode. + let explored_managed_hosts = db::explored_managed_host::find_all(&env.pool).await?; + assert!( + explored_managed_hosts.is_empty(), + "host should wait for the queued NIC-mode flip to apply, not register as zero-DPU this pass" + ); + + // The reset path fires even though the host BMC never enumerated the DPU + // over PCIe (`expected_managed_dpus_total == 0`), so the queued flip can + // actually apply. + { + let power_calls = explorer + .endpoint_explorer() + .redfish_power_control_calls + .lock() + .unwrap(); + assert!( + power_calls + .iter() + .any(|(_, action)| matches!(action, libredfish::SystemPowerControl::PowerCycle)), + "host should be power-cycled to apply the queued NIC-mode flip; power calls so far: {power_calls:?}" + ); + } + + Ok(()) +} + /// A managed host's DPU-facing `machine_interface` is created (via DHCP) with /// just a MAC and no `boot_interface_id`. The exploration that ingests the host /// then backfills the vendor-specific Redfish interface id onto that row, matched