diff --git a/src/ibverbs/device_context.rs b/src/ibverbs/device_context.rs index f1c8802..d29a2cf 100644 --- a/src/ibverbs/device_context.rs +++ b/src/ibverbs/device_context.rs @@ -8,12 +8,13 @@ use std::mem::MaybeUninit; use std::ptr::{self, NonNull}; use std::sync::Arc; +use bitmask_enum::bitmask; use rdma_mummy_sys::{ ibv_alloc_pd, ibv_close_device, ibv_context, ibv_device_attr_ex, ibv_get_device_guid, ibv_get_device_name, ibv_gid_entry, ibv_mtu, ibv_port_attr, ibv_port_state, ibv_query_device_ex, ibv_query_gid, ibv_query_gid_ex, - ibv_query_gid_table, ibv_query_gid_type, ibv_query_port, IBV_GID_TYPE_IB, IBV_GID_TYPE_ROCE_V1, - IBV_GID_TYPE_ROCE_V2, IBV_GID_TYPE_SYSFS_IB_ROCE_V1, IBV_GID_TYPE_SYSFS_ROCE_V2, IBV_LINK_LAYER_ETHERNET, - IBV_LINK_LAYER_INFINIBAND, IBV_LINK_LAYER_UNSPECIFIED, + ibv_query_gid_table, ibv_query_gid_type, ibv_query_port, ibv_query_rt_values_ex, ibv_values_ex, ibv_values_mask, + IBV_GID_TYPE_IB, IBV_GID_TYPE_ROCE_V1, IBV_GID_TYPE_ROCE_V2, IBV_GID_TYPE_SYSFS_IB_ROCE_V1, + IBV_GID_TYPE_SYSFS_ROCE_V2, IBV_LINK_LAYER_ETHERNET, IBV_LINK_LAYER_INFINIBAND, IBV_LINK_LAYER_UNSPECIFIED, }; use serde::{Deserialize, Serialize}; @@ -22,6 +23,22 @@ use super::completion::{CompletionChannel, CompletionQueueBuilder, CreateComplet use super::device::{DeviceInfo, TransportType}; use super::protection_domain::ProtectionDomain; +/// Error returned by [`DeviceContext::query_rt_values_ex`] for querying real-time values. +#[derive(Debug, thiserror::Error)] +#[error("failed to query RT values")] +#[non_exhaustive] +pub struct QueryRealTimeValuesError(#[from] pub QueryRealTimeValuesErrorKind); + +/// The enum type for [`QueryRealTimeValuesError`]. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +#[non_exhaustive] +pub enum QueryRealTimeValuesErrorKind { + Ibverbs(#[from] io::Error), + #[error("operation not supported by driver")] + NotSupported, +} + /// Error returned by [`DeviceContext::alloc_pd`] for allocating a new RDMA PD. #[derive(Debug, thiserror::Error)] #[error("failed to alloc protection domain")] @@ -106,6 +123,65 @@ pub enum QueryGidErrorKind { Ibverbs(#[from] io::Error), } +/// Bitmask of values to request (or that were returned) by [`DeviceContext::query_rt_values_ex`]. +/// +/// Set the desired bits before calling [`DeviceContext::query_rt_values_ex`]; on success the +/// returned [`RealTimeValues::comp_mask`] indicates which fields were actually populated by the driver. +#[bitmask(u32)] +#[bitmask_config(vec_debug)] +pub enum ValuesMask { + /// Query / indicates the raw hardware clock value ([`RealTimeValues::raw_clock`]). + RawClock = ibv_values_mask::IBV_VALUES_MASK_RAW_CLOCK.0 as _, +} + +/// Raw hardware clock counter returned by [`DeviceContext::query_rt_values_ex`]. +/// +/// The two fields are the high and low halves of a free-running hardware tick counter in +/// device-specific units. They are **not** wall-clock seconds and nanoseconds despite the +/// underlying C `timespec` field names. To convert to real time, combine the parts and divide +/// by the device clock frequency (`hca_core_clock` from `ibv_query_device_ex`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct RawClock { + /// High part of the hardware counter (maps to `timespec.tv_sec` in the C struct). + pub counter_hi: u64, + /// Low part of the hardware counter (maps to `timespec.tv_nsec` in the C struct). + pub counter_lo: u64, +} + +impl RawClock { + /// Combine both halves into a single 128-bit tick value. + pub fn to_ticks(&self) -> u128 { + ((self.counter_hi as u128) << 64) | (self.counter_lo as u128) + } +} + +/// Real-time values queried from an RDMA device via [`DeviceContext::query_rt_values_ex`]. +pub struct RealTimeValues { + inner: ibv_values_ex, +} + +impl RealTimeValues { + /// Returns the raw hardware clock counter, or [`None`] if [`ValuesMask::RawClock`] was not + /// set in [`RealTimeValues::comp_mask`] (i.e. the driver did not populate this field). + /// + /// See [`RawClock`] for details on how to interpret the returned value. + pub fn raw_clock(&self) -> Option { + if self.comp_mask().contains(ValuesMask::RawClock) { + Some(RawClock { + counter_hi: self.inner.raw_clock.tv_sec as u64, + counter_lo: self.inner.raw_clock.tv_nsec as u64, + }) + } else { + None + } + } + + /// Returns the `comp_mask` indicating which fields were actually populated by the driver. + pub fn comp_mask(&self) -> ValuesMask { + ValuesMask::from(self.inner.comp_mask) + } +} + /// A Global Unique Indentifier (GUID) for the RDMA device. Usually assigned to the device by its /// vendor during the manufacturing, may contain part of the MAC address on the ethernet device. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] @@ -682,6 +758,42 @@ impl DeviceContext { Ok(entries) } + /// Query real-time values from the RDMA device. + /// + /// Set bits in `mask` to request which values to retrieve. Currently the only defined bit is + /// [`ValuesMask::RawClock`], which retrieves the device's free-running hardware clock — useful + /// for correlating CQ completion timestamps with wall-clock time. + /// + /// Returns [`QueryRealTimeValuesErrorKind::NotSupported`] if the driver does not implement this + /// operation. + /// + /// # Example + /// + /// ```no_run + /// use sideway::ibverbs::device::DeviceList; + /// use sideway::ibverbs::device_context::ValuesMask; + /// + /// let device_list = DeviceList::new().unwrap(); + /// let device = device_list.get(0).unwrap(); + /// let context = device.open().unwrap(); + /// + /// let rt = context.query_rt_values_ex(ValuesMask::RawClock).unwrap(); + /// println!("HW clock: {:?}", rt.raw_clock()); + /// ``` + pub fn query_rt_values_ex(&self, mask: ValuesMask) -> Result { + let mut values = std::mem::MaybeUninit::::uninit(); + unsafe { + (*values.as_mut_ptr()).comp_mask = mask.bits(); + match ibv_query_rt_values_ex(self.context.as_ptr(), values.as_mut_ptr()) { + 0 => Ok(RealTimeValues { + inner: values.assume_init(), + }), + ret if ret == libc::EOPNOTSUPP => Err(QueryRealTimeValuesErrorKind::NotSupported.into()), + ret => Err(QueryRealTimeValuesErrorKind::Ibverbs(io::Error::from_raw_os_error(ret)).into()), + } + } + } + /// # Safety /// /// Return the handle of device context. @@ -725,6 +837,36 @@ mod tests { use super::*; use crate::ibverbs::device::{self, DeviceInfo}; + #[test] + fn test_query_rt_values_ex() -> Result<(), Box> { + let device_list = device::DeviceList::new()?; + for device in &device_list { + let ctx = device.open().unwrap(); + match ctx.query_rt_values_ex(ValuesMask::RawClock) { + Ok(values) => { + // comp_mask must have RawClock set when the driver supports it + assert!(values.comp_mask().contains(ValuesMask::RawClock)); + // A running device should have a non-zero clock + let clock = values + .raw_clock() + .expect("RawClock bit set but raw_clock() returned None"); + assert!( + clock.counter_hi > 0 || clock.counter_lo > 0, + "raw clock counter should be non-zero" + ); + }, + Err(e) => { + // NotSupported is acceptable on some drivers / simulators + assert!( + matches!(e.0, QueryRealTimeValuesErrorKind::NotSupported), + "unexpected error: {e}" + ); + }, + } + } + Ok(()) + } + #[test] fn test_mtu_conversion() { assert_eq!(Mtu::from(ibv_mtu::IBV_MTU_256), Mtu::Mtu256);