diff --git a/Cargo.lock b/Cargo.lock index 5949144ab52..6428a87df7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3279,7 +3279,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.7", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -8286,9 +8286,9 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0218ceea14babe24a4a5836f86ade86c1effbc198164e619194cb5069187e29" +checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92" dependencies = [ "bytes", "chrono", @@ -8301,9 +8301,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed5a1ccce8ff962e31a165d41f6e2a2dd1245099dc4d594f5574a86cd90f4d3" +checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e" dependencies = [ "proc-macro2", "quote", @@ -9216,9 +9216,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "steno" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a1e7ccea133c197729abfd16dccf91a3c4d0da1e94bb0c0aa164c2b8a227481" +version = "0.5.0-dev" dependencies = [ "anyhow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index d461e0585ad..726de828b9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -459,7 +459,7 @@ static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it # harder than expected to make breaking changes (even if you specify a specific # SHA). Cut a new Steno release instead. See omicron#2117. -steno = "0.4.0" +steno = "0.5.0-dev" strum = { version = "0.26", features = [ "derive" ] } subprocess = "0.2.9" supports-color = "3.0.0" @@ -678,8 +678,8 @@ opt-level = 3 # #[patch."https://github.com/oxidecomputer/dropshot"] #dropshot = { path = "../dropshot/dropshot" } -#[patch.crates-io] -#steno = { path = "../steno" } +[patch.crates-io] +steno = { path = "../steno-errors" } #[patch."https://github.com/oxidecomputer/propolis"] #propolis-client = { path = "../propolis/lib/propolis-client" } #propolis-mock-server = { path = "../propolis/bin/mock-server" } diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index ff0cc63d009..0fa021fc317 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -13,6 +13,7 @@ use super::{ use crate::app::sagas::declare_saga_actions; use crate::app::{authn, authz, db}; use crate::external_api::params; +use anyhow::Context; use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::DiskState; @@ -26,6 +27,7 @@ use std::convert::TryFrom; use std::net::SocketAddrV6; use steno::ActionError; use steno::Node; +use steno::UndoActionPermanentError; use uuid::Uuid; // disk create saga: input parameters @@ -218,7 +220,7 @@ async fn sdc_create_disk_record( async fn sdc_create_disk_record_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let disk_id = sagactx.lookup::("disk_id")?; @@ -228,7 +230,9 @@ async fn sdc_create_disk_record_undo( &disk_id, &[DiskState::Detached, DiskState::Faulted, DiskState::Creating], ) - .await?; + .await + .context("project_delete_disk_no_auth") + .map_err(UndoActionPermanentError::from)?; Ok(()) } @@ -273,7 +277,7 @@ async fn sdc_alloc_regions( async fn sdc_alloc_regions_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let log = osagactx.log(); @@ -285,7 +289,12 @@ async fn sdc_alloc_regions_undo( .map(|(_, region)| region.id()) .collect::>(); - osagactx.datastore().regions_hard_delete(log, region_ids).await?; + osagactx + .datastore() + .regions_hard_delete(log, region_ids) + .await + .context("regions_hard_delete") + .map_err(UndoActionPermanentError::from)?; Ok(()) } @@ -315,7 +324,7 @@ async fn sdc_account_space( async fn sdc_account_space_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; @@ -333,7 +342,7 @@ async fn sdc_account_space_undo( disk_created.size, ) .await - .map_err(ActionError::action_failed)?; + .context("virtual_provisioning_collection_delete_disk")?; Ok(()) } @@ -542,7 +551,7 @@ async fn sdc_regions_ensure( async fn sdc_regions_ensure_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let log = sagactx.user_data().log(); let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); @@ -562,7 +571,8 @@ async fn sdc_regions_ensure_undo( "datasets_and_regions", )?, ) - .await; + .await + .context("delete_crucible_regions"); match result { Err(e) => { @@ -578,7 +588,7 @@ async fn sdc_regions_ensure_undo( .disk_id(disk_id) .fetch_for(authz::Action::Modify) .await - .map_err(ActionError::action_failed)?; + .context("lookup disk")?; datastore .disk_update_runtime( @@ -586,7 +596,8 @@ async fn sdc_regions_ensure_undo( &authz_disk, &db_disk.runtime().faulted(), ) - .await?; + .await + .context("disk_update_runtime")?; return Err(e.into()); } @@ -620,7 +631,7 @@ async fn sdc_create_volume_record( async fn sdc_create_volume_record_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; @@ -630,9 +641,14 @@ async fn sdc_create_volume_record_undo( osagactx .datastore() .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) - .await?; + .await + .context("decrease_crucible_resource_count_and_soft_delete_volume")?; - osagactx.datastore().volume_hard_delete(volume_id).await?; + osagactx + .datastore() + .volume_hard_delete(volume_id) + .await + .context("volume_hard_delete")?; Ok(()) } @@ -758,7 +774,7 @@ async fn sdc_call_pantry_attach_for_disk( async fn sdc_call_pantry_attach_for_disk_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let log = sagactx.user_data().log(); let disk_id = sagactx.lookup::("disk_id")?; diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index 24cf331a347..75ee66b232f 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -15,6 +15,7 @@ use serde::Deserialize; use serde::Serialize; use steno::ActionError; use steno::Node; +use steno::UndoActionPermanentError; use uuid::Uuid; // disk delete saga: input parameters @@ -117,7 +118,7 @@ async fn sdd_delete_disk_record( async fn sdd_delete_disk_record_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; @@ -156,7 +157,7 @@ async fn sdd_account_space( async fn sdd_account_space_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; diff --git a/nexus/src/app/sagas/finalize_disk.rs b/nexus/src/app/sagas/finalize_disk.rs index 89893fb703a..33f90157324 100644 --- a/nexus/src/app/sagas/finalize_disk.rs +++ b/nexus/src/app/sagas/finalize_disk.rs @@ -24,6 +24,7 @@ use serde::Serialize; use std::net::SocketAddrV6; use steno::ActionError; use steno::Node; +use steno::UndoActionPermanentError; use uuid::Uuid; #[derive(Debug, Deserialize, Serialize)] @@ -184,7 +185,7 @@ async fn sfd_set_finalizing_state( async fn sfd_set_finalizing_state_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let log = sagactx.user_data().log(); let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index f336a01f0cc..f22a7be5e6d 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -10,6 +10,7 @@ use crate::app::{ MAX_NICS_PER_INSTANCE, }; use crate::external_api::params; +use anyhow::{anyhow, Context}; use nexus_db_model::{ExternalIp, NetworkInterfaceKind}; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::LookupPath; @@ -29,8 +30,8 @@ use slog::warn; use std::collections::HashSet; use std::convert::TryFrom; use std::fmt::Debug; -use steno::ActionError; use steno::Node; +use steno::{ActionError, UndoActionPermanentError}; use steno::{DagBuilder, SagaName}; use uuid::Uuid; @@ -350,7 +351,7 @@ async fn sic_associate_ssh_keys( async fn sic_associate_ssh_keys_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let datastore = osagactx.datastore(); let saga_params = sagactx.saga_params::()?; @@ -363,7 +364,7 @@ async fn sic_associate_ssh_keys_undo( datastore .instance_ssh_keys_delete(&opctx, instance_id) .await - .map_err(ActionError::action_failed)?; + .context("instance_ssh_keys_delete")?; Ok(()) } @@ -411,7 +412,7 @@ async fn sic_create_network_interface( /// Delete one network interface, by interface id. async fn sic_create_network_interface_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let repeat_saga_params = sagactx.saga_params::()?; let instance_id = repeat_saga_params.instance_id; let saga_params = repeat_saga_params.saga_params; @@ -426,7 +427,7 @@ async fn sic_create_network_interface_undo( .instance_id(instance_id) .lookup_for(authz::Action::Modify) .await - .map_err(ActionError::action_failed)?; + .context("instance lookup")?; let interface_deleted = match LookupPath::new(&opctx, &datastore) .instance_network_interface_id(interface_id) @@ -443,10 +444,13 @@ async fn sic_create_network_interface_undo( &authz_interface, ) .await - .map_err(|e| e.into_external())? + .map_err(|e| { + anyhow!(e.into_external()) + .context("instance_deletE_network_interface") + })? } Err(Error::ObjectNotFound { .. }) => false, - Err(e) => return Err(e.into()), + Err(e) => return Err(anyhow!(e).context("instance lookup").into()), }; if !interface_deleted { @@ -662,7 +666,7 @@ async fn sic_allocate_instance_snat_ip( /// Destroy an allocated SNAT IP address for the instance. async fn sic_allocate_instance_snat_ip_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let datastore = osagactx.datastore(); let saga_params = sagactx.saga_params::()?; @@ -671,7 +675,10 @@ async fn sic_allocate_instance_snat_ip_undo( &saga_params.serialized_authn, ); let ip_id = sagactx.lookup::("snat_ip_id")?; - datastore.deallocate_external_ip(&opctx, ip_id).await?; + datastore + .deallocate_external_ip(&opctx, ip_id) + .await + .context("deallocate_external_ip")?; Ok(()) } @@ -781,7 +788,7 @@ async fn sic_allocate_instance_external_ip( async fn sic_allocate_instance_external_ip_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let datastore = osagactx.datastore(); let repeat_saga_params = sagactx.saga_params::()?; @@ -808,13 +815,17 @@ async fn sic_allocate_instance_external_ip_undo( match ip_params { params::ExternalIpCreate::Ephemeral { .. } => { - datastore.deallocate_external_ip(&opctx, ip.id).await?; + datastore + .deallocate_external_ip(&opctx, ip.id) + .await + .context("deallocate_external_ip")?; } params::ExternalIpCreate::Floating { .. } => { let (.., authz_fip) = LookupPath::new(&opctx, &datastore) .floating_ip_id(ip.id) .lookup_for(authz::Action::Modify) - .await?; + .await + .context("lookup floating IP")?; datastore .floating_ip_begin_detach( @@ -823,7 +834,8 @@ async fn sic_allocate_instance_external_ip_undo( repeat_saga_params.instance_id, true, ) - .await?; + .await + .context("floating_ip_begin_detach")?; let n_rows = datastore .external_ip_complete_op( @@ -834,7 +846,7 @@ async fn sic_allocate_instance_external_ip_undo( nexus_db_model::IpAttachState::Detached, ) .await - .map_err(ActionError::action_failed)?; + .context("external_ip_complete_op")?; if n_rows != 1 { error!( @@ -857,8 +869,10 @@ async fn sic_attach_disk_to_instance( async fn sic_attach_disk_to_instance_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - Ok(ensure_instance_disk_attach_state(sagactx, false).await?) +) -> Result<(), UndoActionPermanentError> { + Ok(ensure_instance_disk_attach_state(sagactx, false) + .await + .context("ensure_instance_disk_attach_state")?) } async fn ensure_instance_disk_attach_state( @@ -954,7 +968,7 @@ async fn sic_create_instance_record( async fn sic_delete_instance_record( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let datastore = osagactx.datastore(); @@ -988,7 +1002,7 @@ async fn sic_delete_instance_record( Ok((.., authz_instance, db_instance)) => (authz_instance, db_instance), Err(err) => match err { Error::ObjectNotFound { .. } => return Ok(()), - _ => return Err(err.into()), + _ => return Err(anyhow!(err).context("instance lookup").into()), }, }; @@ -1004,8 +1018,10 @@ async fn sic_delete_instance_record( ..db_instance.runtime_state }; - let updated = - datastore.instance_update_runtime(&instance_id, &runtime_state).await?; + let updated = datastore + .instance_update_runtime(&instance_id, &runtime_state) + .await + .context("instance_update_runtime")?; if !updated { warn!( @@ -1015,7 +1031,10 @@ async fn sic_delete_instance_record( } // Actually delete the record. - datastore.project_delete_instance(&opctx, &authz_instance).await?; + datastore + .project_delete_instance(&opctx, &authz_instance) + .await + .context("project_delete_instance")?; Ok(()) } diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs index f8edf37dc4e..238b269a994 100644 --- a/nexus/src/app/sagas/instance_ip_attach.rs +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -10,12 +10,13 @@ use super::instance_common::{ use super::{ActionRegistry, NexusActionContext, NexusSaga}; use crate::app::sagas::declare_saga_actions; use crate::app::{authn, authz}; +use anyhow::Context; use nexus_db_model::{IpAttachState, Ipv4NatEntry}; use nexus_types::external_api::views; use omicron_common::api::external::Error; use serde::Deserialize; use serde::Serialize; -use steno::ActionError; +use steno::{ActionError, UndoActionPermanentError}; use uuid::Uuid; // The IP attach/detach sagas do some resource locking -- because we @@ -141,7 +142,7 @@ async fn siia_begin_attach_ip( async fn siia_begin_attach_ip_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let log = sagactx.user_data().log(); warn!(log, "siia_begin_attach_ip_undo: Reverting detached->attaching"); let params = sagactx.saga_params::()?; @@ -153,7 +154,8 @@ async fn siia_begin_attach_ip_undo( IpAttachState::Detached, &new_ip, ) - .await? + .await + .context("instance_ip_move_state")? { error!(log, "siia_begin_attach_ip_undo: external IP was deleted") } @@ -193,7 +195,7 @@ async fn siia_nat( async fn siia_nat_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let log = sagactx.user_data().log(); let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; @@ -236,7 +238,7 @@ async fn siia_nat_undo( .nexus() .delete_dpd_config_by_entry(&opctx, &nat_entry) .await - .map_err(ActionError::action_failed) + .context("delete_dpd_config_by_entry") { error!(log, "siia_nat_undo: failed to notify DPD: {e}"); } @@ -256,7 +258,7 @@ async fn siia_update_opte( async fn siia_update_opte_undo( sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { +) -> Result<(), UndoActionPermanentError> { let log = sagactx.user_data().log(); let params = sagactx.saga_params::()?; let sled_id = sagactx.lookup::>("instance_state")?; @@ -268,6 +270,7 @@ async fn siia_update_opte_undo( target_ip, ) .await + .context("instance_ip_remove_opte") { error!(log, "siia_update_opte_undo: failed to notify sled-agent: {e}"); } diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs index 9625d77bf91..cd376871a03 100644 --- a/nexus/src/app/sagas/instance_ip_detach.rs +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -11,6 +11,7 @@ use super::{ActionRegistry, NexusActionContext, NexusSaga}; use crate::app::sagas::declare_saga_actions; use crate::app::{authn, authz, db}; use crate::external_api::params; +use anyhow::Context; use nexus_db_model::IpAttachState; use nexus_db_queries::db::lookup::LookupPath; use nexus_types::external_api::views; @@ -145,7 +146,8 @@ async fn siid_begin_detach_ip_undo( IpAttachState::Attached, &new_ip, ) - .await? + .await + .context("instance_ip_move_state")? { error!(log, "siid_begin_detach_ip_undo: external IP was deleted") } @@ -194,6 +196,7 @@ async fn siid_nat_undo( target_ip, ) .await + .context("instance_ip_add_nat") { error!(log, "siid_nat_undo: failed to notify DPD: {e}"); } @@ -230,6 +233,7 @@ async fn siid_update_opte_undo( target_ip, ) .await + .context("instance_ip_add_opte") { error!(log, "siid_update_opte_undo: failed to notify sled-agent: {e}"); } diff --git a/nexus/types/src/internal_api/views.rs b/nexus/types/src/internal_api/views.rs index fde2d070725..05d9051a65e 100644 --- a/nexus/types/src/internal_api/views.rs +++ b/nexus/types/src/internal_api/views.rs @@ -12,7 +12,7 @@ use serde::Serialize; use std::time::Duration; use std::time::Instant; use steno::SagaResultErr; -use steno::UndoActionError; +use steno::UndoActionPermanentError; use uuid::Uuid; pub async fn to_list(object_stream: ObjectStream) -> Vec @@ -118,15 +118,17 @@ impl From for SagaState { }, .. } => { - let UndoActionError::PermanentFailure { - source_error: undo_source_error, - } = undo_error; - SagaState::Stuck { - error_node_name, - error_info: SagaErrorInfo::from(error_source), - undo_error_node_name: undo_node_name, - undo_source_error, - } + // XXX-dap + todo!(); + // let UndoActionPermanentError::PermanentFailure { + // source_error: undo_source_error, + // } = undo_error; + // SagaState::Stuck { + // error_node_name, + // error_info: SagaErrorInfo::from(error_source), + // undo_error_node_name: undo_node_name, + // undo_source_error, + // } } steno::SagaStateView::Done { result: