Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 145 additions & 3 deletions src/ibverbs/device_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ use std::mem::MaybeUninit;
use std::ptr::{self, NonNull};
use std::sync::Arc;

use bitmask_enum::bitmask;
use rdma_mummy_sys::{
ibv_alloc_pd, ibv_close_device, ibv_context, ibv_device_attr_ex, ibv_get_device_guid, ibv_get_device_name,
ibv_gid_entry, ibv_mtu, ibv_port_attr, ibv_port_state, ibv_query_device_ex, ibv_query_gid, ibv_query_gid_ex,
ibv_query_gid_table, ibv_query_gid_type, ibv_query_port, IBV_GID_TYPE_IB, IBV_GID_TYPE_ROCE_V1,
IBV_GID_TYPE_ROCE_V2, IBV_GID_TYPE_SYSFS_IB_ROCE_V1, IBV_GID_TYPE_SYSFS_ROCE_V2, IBV_LINK_LAYER_ETHERNET,
IBV_LINK_LAYER_INFINIBAND, IBV_LINK_LAYER_UNSPECIFIED,
ibv_query_gid_table, ibv_query_gid_type, ibv_query_port, ibv_query_rt_values_ex, ibv_values_ex, ibv_values_mask,
IBV_GID_TYPE_IB, IBV_GID_TYPE_ROCE_V1, IBV_GID_TYPE_ROCE_V2, IBV_GID_TYPE_SYSFS_IB_ROCE_V1,
IBV_GID_TYPE_SYSFS_ROCE_V2, IBV_LINK_LAYER_ETHERNET, IBV_LINK_LAYER_INFINIBAND, IBV_LINK_LAYER_UNSPECIFIED,
};
use serde::{Deserialize, Serialize};

Expand All @@ -22,6 +23,22 @@ use super::completion::{CompletionChannel, CompletionQueueBuilder, CreateComplet
use super::device::{DeviceInfo, TransportType};
use super::protection_domain::ProtectionDomain;

/// Error returned by [`DeviceContext::query_rt_values_ex`] for querying real-time values.
#[derive(Debug, thiserror::Error)]
#[error("failed to query RT values")]
#[non_exhaustive]
pub struct QueryRealTimeValuesError(#[from] pub QueryRealTimeValuesErrorKind);

/// The enum type for [`QueryRealTimeValuesError`].
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
#[non_exhaustive]
pub enum QueryRealTimeValuesErrorKind {
Ibverbs(#[from] io::Error),
#[error("operation not supported by driver")]
NotSupported,
}

/// Error returned by [`DeviceContext::alloc_pd`] for allocating a new RDMA PD.
#[derive(Debug, thiserror::Error)]
#[error("failed to alloc protection domain")]
Expand Down Expand Up @@ -106,6 +123,65 @@ pub enum QueryGidErrorKind {
Ibverbs(#[from] io::Error),
}

/// Bitmask of values to request (or that were returned) by [`DeviceContext::query_rt_values_ex`].
///
/// Set the desired bits before calling [`DeviceContext::query_rt_values_ex`]; on success the
/// returned [`RealTimeValues::comp_mask`] indicates which fields were actually populated by the driver.
#[bitmask(u32)]
#[bitmask_config(vec_debug)]
pub enum ValuesMask {
/// Query / indicates the raw hardware clock value ([`RealTimeValues::raw_clock`]).
RawClock = ibv_values_mask::IBV_VALUES_MASK_RAW_CLOCK.0 as _,
}

/// Raw hardware clock counter returned by [`DeviceContext::query_rt_values_ex`].
///
/// The two fields are the high and low halves of a free-running hardware tick counter in
/// device-specific units. They are **not** wall-clock seconds and nanoseconds despite the
/// underlying C `timespec` field names. To convert to real time, combine the parts and divide
/// by the device clock frequency (`hca_core_clock` from `ibv_query_device_ex`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RawClock {
/// High part of the hardware counter (maps to `timespec.tv_sec` in the C struct).
pub counter_hi: u64,
/// Low part of the hardware counter (maps to `timespec.tv_nsec` in the C struct).
pub counter_lo: u64,
}

impl RawClock {
/// Combine both halves into a single 128-bit tick value.
pub fn to_ticks(&self) -> u128 {
((self.counter_hi as u128) << 64) | (self.counter_lo as u128)
}
}

/// Real-time values queried from an RDMA device via [`DeviceContext::query_rt_values_ex`].
pub struct RealTimeValues {
inner: ibv_values_ex,
}

impl RealTimeValues {
/// Returns the raw hardware clock counter, or [`None`] if [`ValuesMask::RawClock`] was not
/// set in [`RealTimeValues::comp_mask`] (i.e. the driver did not populate this field).
///
/// See [`RawClock`] for details on how to interpret the returned value.
pub fn raw_clock(&self) -> Option<RawClock> {
if self.comp_mask().contains(ValuesMask::RawClock) {
Some(RawClock {
counter_hi: self.inner.raw_clock.tv_sec as u64,
counter_lo: self.inner.raw_clock.tv_nsec as u64,
})
} else {
None
}
}

/// Returns the `comp_mask` indicating which fields were actually populated by the driver.
pub fn comp_mask(&self) -> ValuesMask {
ValuesMask::from(self.inner.comp_mask)
}
}

/// A Global Unique Indentifier (GUID) for the RDMA device. Usually assigned to the device by its
/// vendor during the manufacturing, may contain part of the MAC address on the ethernet device.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
Expand Down Expand Up @@ -682,6 +758,42 @@ impl DeviceContext {
Ok(entries)
}

/// Query real-time values from the RDMA device.
///
/// Set bits in `mask` to request which values to retrieve. Currently the only defined bit is
/// [`ValuesMask::RawClock`], which retrieves the device's free-running hardware clock — useful
/// for correlating CQ completion timestamps with wall-clock time.
///
/// Returns [`QueryRealTimeValuesErrorKind::NotSupported`] if the driver does not implement this
/// operation.
///
/// # Example
///
/// ```no_run
/// use sideway::ibverbs::device::DeviceList;
/// use sideway::ibverbs::device_context::ValuesMask;
///
/// let device_list = DeviceList::new().unwrap();
/// let device = device_list.get(0).unwrap();
/// let context = device.open().unwrap();
///
/// let rt = context.query_rt_values_ex(ValuesMask::RawClock).unwrap();
/// println!("HW clock: {:?}", rt.raw_clock());
/// ```
pub fn query_rt_values_ex(&self, mask: ValuesMask) -> Result<RealTimeValues, QueryRealTimeValuesError> {
let mut values = std::mem::MaybeUninit::<ibv_values_ex>::uninit();
unsafe {
(*values.as_mut_ptr()).comp_mask = mask.bits();
match ibv_query_rt_values_ex(self.context.as_ptr(), values.as_mut_ptr()) {
0 => Ok(RealTimeValues {
inner: values.assume_init(),
}),
ret if ret == libc::EOPNOTSUPP => Err(QueryRealTimeValuesErrorKind::NotSupported.into()),
ret => Err(QueryRealTimeValuesErrorKind::Ibverbs(io::Error::from_raw_os_error(ret)).into()),
}
}
}

/// # Safety
///
/// Return the handle of device context.
Expand Down Expand Up @@ -725,6 +837,36 @@ mod tests {
use super::*;
use crate::ibverbs::device::{self, DeviceInfo};

#[test]
fn test_query_rt_values_ex() -> Result<(), Box<dyn std::error::Error>> {
let device_list = device::DeviceList::new()?;
for device in &device_list {
let ctx = device.open().unwrap();
match ctx.query_rt_values_ex(ValuesMask::RawClock) {
Ok(values) => {
// comp_mask must have RawClock set when the driver supports it
assert!(values.comp_mask().contains(ValuesMask::RawClock));
// A running device should have a non-zero clock
let clock = values
.raw_clock()
.expect("RawClock bit set but raw_clock() returned None");
assert!(
clock.counter_hi > 0 || clock.counter_lo > 0,
"raw clock counter should be non-zero"
);
},
Err(e) => {
// NotSupported is acceptable on some drivers / simulators
assert!(
matches!(e.0, QueryRealTimeValuesErrorKind::NotSupported),
"unexpected error: {e}"
);
},
}
}
Ok(())
}

#[test]
fn test_mtu_conversion() {
assert_eq!(Mtu::from(ibv_mtu::IBV_MTU_256), Mtu::Mtu256);
Expand Down
Loading