Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 60 additions & 30 deletions crates/site-explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ impl SiteExplorer {
let DpuExplorationState {
reported_total: host_reported_dpus_total,
running_as_nic_total: mut host_reported_dpus_nic_mode_total,
all_configured: all_dpus_configured_properly_in_host,
all_configured: mut all_dpus_configured_properly_in_host,
running_as_dpu: mut dpus_explored_for_host,
} = dpu_exploration;

Expand All @@ -1166,30 +1166,53 @@ impl SiteExplorer {
{
for dpu_sn in &expected_machine.data.fallback_dpu_serial_numbers {
if let Some(dpu_ep) = dpu_sn_to_endpoint.remove(dpu_sn.as_str()) {
// We do not want to attach bluefields that are in NIC mode as DPUs to the host
if is_dpu_in_nic_mode(&dpu_ep, &ep)
&& host_reported_dpus_total
.saturating_sub(host_reported_dpus_nic_mode_total)
> 0
{
host_reported_dpus_nic_mode_total += 1;
continue;
}
// Enforce the host's declared DPU mode on a fallback-serial
// match the same way the host-reported path does, rather than
// trusting it as already-configured. A DPU still in the wrong
// mode gets a `set_nic_mode` here and has to wait for the host
// reset to apply it; without this, a DPU-mode BlueField on a
// `NicMode` host would be attached and then dropped to zero-DPU
// (the `NicMode` arm further down), leaving the database reading
// "NIC-mode host" while the hardware stayed in DPU mode.
let mode_check = Some(
self.check_and_configure_dpu_mode(
&dpu_ep,
dpu_ep.report.model().unwrap_or_default(),
host_dpu_mode,
)
.await,
);

// we found at least one DPU from expected machines for this host
// assume that the expected machines is the source of truth. Clear the
// contents of dpus_explored_for_host to discard the previous results of
// iterating over the hosts pcie devices.
if !dpu_added {
dpus_explored_for_host.clear();
match classify_matched_dpu(&dpu_ep, &ep, mode_check) {
DiscoveredDpu::RunningAsDpu(dpu) => {
// The expected-machine fallback list is the source of
// truth here, so discard whatever the PCIe scan found
// on the first confirmed match.
if !dpu_added {
dpus_explored_for_host.clear();
}
dpu_added = true;
dpus_explored_for_host.push(dpu);
}
DiscoveredDpu::RunningAsNic => {
host_reported_dpus_nic_mode_total += 1;
}
DiscoveredDpu::NeedsReconfig => {
// `set_nic_mode` was just issued; the host needs a
// reset before this DPU re-reports in the new mode, so
// mark it not-yet-configured and let the reset path
// below run.
all_dpus_configured_properly_in_host = false;
}
DiscoveredDpu::ModeCheckFailed(err) => {
tracing::warn!(
dpu = %dpu_ep.address,
dpu_sn = %dpu_sn,
error = %err,
"failed to check fallback-matched DPU mode; skipping this device this pass",
);
}
}

dpu_added = true;
dpus_explored_for_host.push(ExploredDpu {
bmc_ip: dpu_ep.address,
host_pf_mac_address: get_host_pf_mac_address(&dpu_ep),
report: dpu_ep.report.into(),
});
}
}
}
Expand All @@ -1203,13 +1226,20 @@ impl SiteExplorer {
// confirmed to be running as plain NICs.
let expected_managed_dpus_total =
host_reported_dpus_total.saturating_sub(host_reported_dpus_nic_mode_total);
if expected_managed_dpus_total > 0 {
tracing::warn!(
address = %ep.address,
exploration_report = ?ep,
"cannot identify managed host because the site explorer has only discovered {} out of the {} attached DPUs (all_dpus_configured_properly_in_host={all_dpus_configured_properly_in_host}):\n{:#?}",
dpus_explored_for_host.len(), expected_managed_dpus_total, dpus_explored_for_host
);
// Enter the reset/wait path when DPUs are still expected to pair, or
// when a `set_nic_mode` was just issued -- a fallback-serial match can
// queue a flip even on a host whose BMC reports no DPU over PCIe
// (`expected_managed_dpus_total == 0`), which is the usual reason we are
// on the fallback path at all.
if expected_managed_dpus_total > 0 || !all_dpus_configured_properly_in_host {
if expected_managed_dpus_total > 0 {
tracing::warn!(
address = %ep.address,
exploration_report = ?ep,
"cannot identify managed host because the site explorer has only discovered {} out of the {} attached DPUs (all_dpus_configured_properly_in_host={all_dpus_configured_properly_in_host}):\n{:#?}",
dpus_explored_for_host.len(), expected_managed_dpus_total, dpus_explored_for_host
);
}

if !all_dpus_configured_properly_in_host {
// A queued `set_nic_mode` only takes effect after a host
Expand Down
126 changes: 126 additions & 0 deletions crates/site-explorer/tests/site_explorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2606,6 +2606,132 @@ async fn test_site_explorer_power_cycles_non_dell_host_to_apply_nic_mode(
Ok(())
}

/// Regression guard for the fallback-serial path (#2631): a DPU paired only
/// through `fallback_dpu_serial_numbers` must get the same NIC-mode enforcement
/// as a host-reported one. The host BMC here enumerates no DPU over PCIe -- the
/// usual reason the fallback exists (e.g. a GB200 that drops a DPU from its
/// inventory) -- so the only link is the operator-listed serial, and the DPU is
/// still reporting DPU mode against a `NicMode` host.
///
/// Before the fix the fallback path trusted the match as already-configured: it
/// attached the DPU without a mode check, then dropped it to zero-DPU, so the
/// host registered as a NIC-mode host while the BlueField stayed in DPU mode and
/// `set_nic_mode` was never issued. Now the flip is issued, the host is
/// power-cycled to apply it, and the host waits instead of settling this pass.
#[sqlx_test]
async fn test_site_explorer_enforces_nic_mode_on_fallback_serial_match(
pool: PgPool,
) -> Result<(), Box<dyn std::error::Error>> {
use model::expected_machine::{DpuMode, ExpectedMachine, ExpectedMachineData};
use model::site_explorer::NicMode;

let env = Env::new(pool).await;

const FALLBACK_DPU_SERIAL: &str = "fallback-only-dpu-serial";
// DPU reports DPU mode; the host report carries no DPU device, so the
// serial is the only thing that can pair them.
let dpu_config = DpuConfig {
nic_mode: Some(NicMode::Dpu),
serial: FALLBACK_DPU_SERIAL.to_string(),
..DpuConfig::default()
};
let mock_host = ManagedHostConfig::default();
let host_bmc_mac = mock_host.bmc_mac_address;

// Operator declares the host NIC mode and lists the DPU's serial as a
// pairing fallback.
let mut txn = env.pool.begin().await?;
db::expected_machine::create(
&mut txn,
ExpectedMachine {
id: None,
bmc_mac_address: host_bmc_mac,
data: ExpectedMachineData {
bmc_username: "ADMIN".to_string(),
bmc_password: "PASS".to_string(),
serial_number: "EM-2631-FALLBACK-NIC".to_string(),
metadata: model::metadata::Metadata::new_with_default_name(),
dpu_mode: DpuMode::NicMode,
fallback_dpu_serial_numbers: vec![FALLBACK_DPU_SERIAL.to_string()],
..Default::default()
},
},
)
.await?;
txn.commit().await?;

let mut host_bmc = env.new_machine(&host_bmc_mac.to_string(), "SomeVendor");
let mut dpu_bmc = env.new_machine(&dpu_config.bmc_mac_address.to_string(), "NVIDIA/BF/BMC");
host_bmc.discover_dhcp(env.api()).await?;
dpu_bmc.discover_dhcp(env.api()).await?;

let explorer_config = SiteExplorerConfig {
enabled: Arc::new(true.into()),
retained_boot_interface_window: None,
explorations_per_run: 10,
concurrent_explorations: 1,
run_interval: std::time::Duration::from_secs(1),
create_machines: Arc::new(true.into()),
..Default::default()
};
let explorer = env.test_site_explorer(explorer_config);
explorer.insert_endpoint_results(vec![
(dpu_bmc.ip.parse().unwrap(), Ok(dpu_config.clone().into())),
(host_bmc.ip.parse().unwrap(), Ok(mock_host.into())),
]);

// First iteration: initial endpoint exploration.
explorer.run_single_iteration().await.unwrap();
let mut txn = env.pool.begin().await?;
for ip in [host_bmc.ip.parse()?, dpu_bmc.ip.parse()?] {
db::explored_endpoints::set_preingestion_complete(ip, &mut txn).await?;
}
txn.commit().await?;
// Second iteration: per-host matching falls through to the fallback-serial
// path, which must enforce the declared NIC mode.
explorer.run_single_iteration().await.unwrap();

{
let calls = explorer
.endpoint_explorer()
.set_nic_mode_calls
.lock()
.unwrap();
assert!(
calls.iter().any(|(_, mode)| *mode == NicMode::Nic),
"fallback-matched DPU on a NicMode host should get set_nic_mode(Nic); calls so far: {calls:?}"
);
}

// The host must not settle as a zero-DPU managed host until the flip has
// applied -- otherwise the database reads "NIC-mode host" while the
// BlueField is still physically in DPU mode.
let explored_managed_hosts = db::explored_managed_host::find_all(&env.pool).await?;
assert!(
explored_managed_hosts.is_empty(),
"host should wait for the queued NIC-mode flip to apply, not register as zero-DPU this pass"
);

// The reset path fires even though the host BMC never enumerated the DPU
// over PCIe (`expected_managed_dpus_total == 0`), so the queued flip can
// actually apply.
{
let power_calls = explorer
.endpoint_explorer()
.redfish_power_control_calls
.lock()
.unwrap();
assert!(
power_calls
.iter()
.any(|(_, action)| matches!(action, libredfish::SystemPowerControl::PowerCycle)),
"host should be power-cycled to apply the queued NIC-mode flip; power calls so far: {power_calls:?}"
);
}

Ok(())
}

/// A managed host's DPU-facing `machine_interface` is created (via DHCP) with
/// just a MAC and no `boot_interface_id`. The exploration that ingests the host
/// then backfills the vendor-specific Redfish interface id onto that row, matched
Expand Down
Loading