diff --git a/Cargo.lock b/Cargo.lock index ab074128a..aafbe4117 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6891,6 +6891,7 @@ dependencies = [ "cfg-if", "cortex-m", "critical-section", + "hubpack", "num-derive 0.4.2", "num-traits", "paste", diff --git a/drv/transceivers-server/src/main.rs b/drv/transceivers-server/src/main.rs index 1a299be09..637c75645 100644 --- a/drv/transceivers-server/src/main.rs +++ b/drv/transceivers-server/src/main.rs @@ -49,7 +49,7 @@ task_slot!(THERMAL, thermal); include!(concat!(env!("OUT_DIR"), "/i2c_config.rs")); #[allow(dead_code)] -#[derive(Copy, Clone, PartialEq, Eq, Count)] +#[derive(Copy, Clone, PartialEq, Count)] enum Trace { #[count(skip)] None, @@ -79,6 +79,8 @@ enum Trace { DisableFailed(usize, LogicalPortMask), ClearDisabledPorts(LogicalPortMask), SeqError(SeqError), + ModuleTemperatureCritical(u8, Celsius), + ModuleTemperaturePowerDown(u8, Celsius), } counted_ringbuf!(Trace, 16, Trace::None); @@ -335,7 +337,7 @@ impl ServerImpl { model: ThermalProperties { target_temperature: Celsius(65.0), critical_temperature: Celsius(70.0), - power_down_temperature: Celsius(80.0), + power_down_temperature: Some(Celsius(80.0)), temperature_slew_deg_per_sec: 0.5, }, }) @@ -429,7 +431,18 @@ impl ServerImpl { // will return a `NotInAutoMode` error if the thermal loop is in // manual mode; this is harmless and will be ignored (instead of // cluttering up the logs). - match self.thermal_api.update_dynamic_input(i, m.model) { + + let model = ThermalProperties { + // We do *not* want the thermal loop to power down the whole + // system in response to a transceiver overheating. Instead, + // we will just disable the individual transceiver here. + // Thus, disable power-down on the version of the device's + // thermal properties by setting the + // `power_down_temperature` to `None`. + power_down_temperature: None, + ..m.model + }; + match self.thermal_api.update_dynamic_input(i as u32, model) { Ok(()) | Err(ThermalError::NotInAutoMode) => (), Err(e) => ringbuf_entry!(Trace::ThermalError(i, e)), } @@ -453,6 +466,25 @@ impl ServerImpl { // We got a temperature! Send it over to the thermal task self.sensor_api .post_now(TRANSCEIVER_TEMPERATURE_SENSORS[i], t.0); + + if m.model.should_power_down(t) { + // If the module's temperature exceeds the power-down + // threshold, add it to the list of things to disable. + ringbuf_entry!(Trace::ModuleTemperaturePowerDown( + port.0, t + )); + // TODO(eliza): ereport + // TODO(eliza): debounce + to_disable.set(port); + } else if m.model.is_critical(t) { + ringbuf_entry!(Trace::ModuleTemperatureCritical( + port.0, t + )); + // TODO(eliza): ereport + // TODO(eliza): track over critical duration... + } + // TODO(eliza): see if it's nominal again and turn it back + // on...? } // We failed to read a temperature :( // diff --git a/idl/thermal.idol b/idl/thermal.idol index 5505b6bcf..dab15236e 100644 --- a/idl/thermal.idol +++ b/idl/thermal.idol @@ -80,13 +80,14 @@ Interface( "update_dynamic_input": ( doc: "Provides a thermal model for a dynamic sensor", args: { - "index": "usize", + "index": "u32", "model": "ThermalProperties", }, reply: Result( ok: "()", err: CLike("ThermalError"), ), + encoding: Hubpack ), "remove_dynamic_input": ( doc: "Removes the given dynamic input, so it is no longer used in the control loop", diff --git a/sys/userlib/Cargo.toml b/sys/userlib/Cargo.toml index f6d621645..1a0f2f69c 100644 --- a/sys/userlib/Cargo.toml +++ b/sys/userlib/Cargo.toml @@ -13,6 +13,7 @@ critical-section = ["dep:critical-section"] bstringify = { workspace = true } cfg-if = { workspace = true } critical-section = {workspace = true, optional = true, features = ["restore-state-none"]} +hubpack = { workspace = true } num-derive = { workspace = true } num-traits = { workspace = true } paste = { workspace = true } diff --git a/sys/userlib/src/units.rs b/sys/userlib/src/units.rs index 005250595..f92ac2dde 100644 --- a/sys/userlib/src/units.rs +++ b/sys/userlib/src/units.rs @@ -6,6 +6,8 @@ //! Tuple structs for units that are useful in the real world //! +use hubpack::SerializedSize; +use serde::{Deserialize, Serialize}; use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; /// Degrees Celsius @@ -19,6 +21,9 @@ use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; IntoBytes, Immutable, KnownLayout, + Serialize, + Deserialize, + SerializedSize, )] #[repr(C)] pub struct Celsius(pub f32); diff --git a/task/thermal-api/src/lib.rs b/task/thermal-api/src/lib.rs index 9c5cc75ef..806aa6aae 100644 --- a/task/thermal-api/src/lib.rs +++ b/task/thermal-api/src/lib.rs @@ -11,7 +11,7 @@ use drv_i2c_api::ResponseCode; use hubpack::SerializedSize; use serde::{Deserialize, Serialize}; use userlib::{units::Celsius, *}; -use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; +use zerocopy::{Immutable, IntoBytes, KnownLayout, TryFromBytes}; #[derive( Copy, Clone, Debug, FromPrimitive, Eq, PartialEq, IdolError, counters::Count, @@ -81,7 +81,7 @@ pub enum ThermalAutoState { } /// Properties for a particular part in the system -#[derive(Clone, Copy, IntoBytes, FromBytes, Immutable, KnownLayout)] +#[derive(Clone, Copy, Serialize, Deserialize, SerializedSize)] #[repr(C)] pub struct ThermalProperties { /// Target temperature for this part @@ -93,7 +93,10 @@ pub struct ThermalProperties { /// Temperature at which we drop into the A2 power state. This should be /// below the part's nonrecoverable temperature. - pub power_down_temperature: Celsius, + /// + /// If this is `None`, the system will not be sent to A2 due to this part's + /// temperature. + pub power_down_temperature: Option, /// Maximum slew rate of temperature, measured in °C per second /// @@ -108,7 +111,11 @@ pub struct ThermalProperties { impl ThermalProperties { /// Returns whether this part is exceeding its power-down temperature pub fn should_power_down(&self, t: Celsius) -> bool { - t.0 >= self.power_down_temperature.0 + if let Some(power_down_temperature) = self.power_down_temperature { + t.0 >= power_down_temperature.0 + } else { + false + } } /// Returns whether this part is exceeding its critical temperature @@ -229,4 +236,11 @@ impl From for SensorReadError { } } +#[derive(Clone, Copy, IntoBytes, TryFromBytes, Immutable, KnownLayout)] +#[repr(u8)] +pub enum PowerDownMode { + System = 1, + Transceiver, +} + include!(concat!(env!("OUT_DIR"), "/client_stub.rs")); diff --git a/task/thermal/src/bsp/cosmo_ab.rs b/task/thermal/src/bsp/cosmo_ab.rs index 608c7c897..3bd57e02d 100644 --- a/task/thermal/src/bsp/cosmo_ab.rs +++ b/task/thermal/src/bsp/cosmo_ab.rs @@ -165,7 +165,7 @@ impl Bsp { const U2_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(65f32), critical_temperature: Celsius(70f32), - power_down_temperature: Celsius(75f32), + power_down_temperature: Some(Celsius(75f32)), temperature_slew_deg_per_sec: 0.5, }; @@ -175,7 +175,7 @@ const U2_THERMALS: ThermalProperties = ThermalProperties { const M2_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(65f32), critical_temperature: Celsius(70f32), - power_down_temperature: Celsius(75f32), + power_down_temperature: Some(Celsius(75f32)), temperature_slew_deg_per_sec: 0.5, }; @@ -186,7 +186,7 @@ const M2_THERMALS: ThermalProperties = ThermalProperties { const CPU_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(80f32), critical_temperature: Celsius(90f32), - power_down_temperature: Celsius(100f32), + power_down_temperature: Some(Celsius(100f32)), temperature_slew_deg_per_sec: 0.5, }; @@ -194,7 +194,7 @@ const CPU_THERMALS: ThermalProperties = ThermalProperties { const T6_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(70f32), critical_temperature: Celsius(80f32), - power_down_temperature: Celsius(85f32), + power_down_temperature: Some(Celsius(85f32)), temperature_slew_deg_per_sec: 0.5, }; diff --git a/task/thermal/src/bsp/gimlet_bcdef.rs b/task/thermal/src/bsp/gimlet_bcdef.rs index d81e0c1e4..b77840eba 100644 --- a/task/thermal/src/bsp/gimlet_bcdef.rs +++ b/task/thermal/src/bsp/gimlet_bcdef.rs @@ -208,7 +208,7 @@ impl Bsp { const DIMM_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(80f32), critical_temperature: Celsius(90f32), - power_down_temperature: Celsius(95f32), + power_down_temperature: Some(Celsius(95f32)), temperature_slew_deg_per_sec: 0.5, }; @@ -223,7 +223,7 @@ const DIMM_THERMALS: ThermalProperties = ThermalProperties { const U2_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(65f32), critical_temperature: Celsius(70f32), - power_down_temperature: Celsius(75f32), + power_down_temperature: Some(Celsius(75f32)), temperature_slew_deg_per_sec: 0.5, }; @@ -233,7 +233,7 @@ const U2_THERMALS: ThermalProperties = ThermalProperties { const M2_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(65f32), critical_temperature: Celsius(70f32), - power_down_temperature: Celsius(75f32), + power_down_temperature: Some(Celsius(75f32)), temperature_slew_deg_per_sec: 0.5, }; @@ -244,7 +244,7 @@ const M2_THERMALS: ThermalProperties = ThermalProperties { const CPU_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(80f32), critical_temperature: Celsius(90f32), - power_down_temperature: Celsius(100f32), + power_down_temperature: Some(Celsius(100f32)), temperature_slew_deg_per_sec: 0.5, }; @@ -252,7 +252,7 @@ const CPU_THERMALS: ThermalProperties = ThermalProperties { const T6_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(70f32), critical_temperature: Celsius(80f32), - power_down_temperature: Celsius(85f32), + power_down_temperature: Some(Celsius(85f32)), temperature_slew_deg_per_sec: 0.5, }; diff --git a/task/thermal/src/bsp/grapefruit.rs b/task/thermal/src/bsp/grapefruit.rs index bfdf13ec5..7afa71502 100644 --- a/task/thermal/src/bsp/grapefruit.rs +++ b/task/thermal/src/bsp/grapefruit.rs @@ -128,7 +128,7 @@ impl Bsp { const LM75_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(60f32), critical_temperature: Celsius(70f32), - power_down_temperature: Celsius(80f32), + power_down_temperature: Some(Celsius(80f32)), temperature_slew_deg_per_sec: 0.5, }; diff --git a/task/thermal/src/bsp/sidecar_bcd.rs b/task/thermal/src/bsp/sidecar_bcd.rs index 68c4b8688..c297a32c0 100644 --- a/task/thermal/src/bsp/sidecar_bcd.rs +++ b/task/thermal/src/bsp/sidecar_bcd.rs @@ -212,7 +212,7 @@ impl Bsp { const TF2_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(60f32), critical_temperature: Celsius(70f32), - power_down_temperature: Celsius(80f32), + power_down_temperature: Some(Celsius(80f32)), temperature_slew_deg_per_sec: 0.5, }; @@ -221,7 +221,7 @@ const TF2_THERMALS: ThermalProperties = ThermalProperties { const VSC7448_THERMALS: ThermalProperties = ThermalProperties { target_temperature: Celsius(85f32), critical_temperature: Celsius(95f32), - power_down_temperature: Celsius(105f32), + power_down_temperature: Some(Celsius(105f32)), temperature_slew_deg_per_sec: 0.5, }; diff --git a/task/thermal/src/main.rs b/task/thermal/src/main.rs index d92dd6380..56b07ed7c 100644 --- a/task/thermal/src/main.rs +++ b/task/thermal/src/main.rs @@ -269,14 +269,14 @@ impl<'a> idl::InOrderThermalImpl for ServerImpl<'a> { fn update_dynamic_input( &mut self, _: &RecvMessage, - index: usize, + index: u32, model: ThermalProperties, ) -> Result<(), RequestError> { if self.mode != ThermalMode::Auto { return Err(ThermalError::NotInAutoMode.into()); } self.control - .update_dynamic_input(index, model) + .update_dynamic_input(index as usize, model) .map_err(RequestError::from) }