diff --git a/crates/api-core/src/api.rs b/crates/api-core/src/api.rs index dd5be74faf..d9b0c8ade9 100644 --- a/crates/api-core/src/api.rs +++ b/crates/api-core/src/api.rs @@ -2254,6 +2254,29 @@ impl Forge for Api { crate::handlers::machine_validation::get_machine_validation_runs(self, request).await } + async fn find_machine_validation_run_item_ids( + &self, + request: Request, + ) -> Result, Status> { + crate::handlers::machine_validation::find_machine_validation_run_item_ids(self, request) + .await + } + + async fn find_machine_validation_run_items_by_ids( + &self, + request: Request, + ) -> Result, Status> { + crate::handlers::machine_validation::find_machine_validation_run_items_by_ids(self, request) + .await + } + + async fn get_machine_validation_attempt( + &self, + request: Request, + ) -> Result, Status> { + crate::handlers::machine_validation::get_machine_validation_attempt(self, request).await + } + async fn admin_power_control( &self, request: Request, diff --git a/crates/api-core/src/auth/internal_rbac_rules.rs b/crates/api-core/src/auth/internal_rbac_rules.rs index 92c6527f83..68785f0d9d 100644 --- a/crates/api-core/src/auth/internal_rbac_rules.rs +++ b/crates/api-core/src/auth/internal_rbac_rules.rs @@ -483,6 +483,18 @@ impl InternalRBACRules { vec![ForgeAdminCLI, SiteAgent], ); x.perm("GetMachineValidationRuns", vec![ForgeAdminCLI, SiteAgent]); + x.perm( + "FindMachineValidationRunItemIds", + vec![ForgeAdminCLI, SiteAgent], + ); + x.perm( + "FindMachineValidationRunItemsByIds", + vec![ForgeAdminCLI, SiteAgent], + ); + x.perm( + "GetMachineValidationAttempt", + vec![ForgeAdminCLI, SiteAgent], + ); x.perm("AdminBmcReset", vec![ForgeAdminCLI]); x.perm("AdminPowerControl", vec![ForgeAdminCLI, Flow]); x.perm("DisableSecureBoot", vec![ForgeAdminCLI]); diff --git a/crates/api-core/src/handlers/machine_validation.rs b/crates/api-core/src/handlers/machine_validation.rs index eba8c0d744..b9e46eaf4d 100644 --- a/crates/api-core/src/handlers/machine_validation.rs +++ b/crates/api-core/src/handlers/machine_validation.rs @@ -18,6 +18,7 @@ use ::rpc::forge::{self as rpc, GetMachineValidationExternalConfigResponse}; use carbide_machine_controller::config::machine_validation::{ MachineValidationConfig, MachineValidationTestSelectionMode, }; +use carbide_uuid::machine_validation::{MachineValidationAttemptId, MachineValidationRunItemId}; use config_version::ConfigVersion; use db::{self, machine_validation_suites}; use model::machine::machine_search_config::MachineSearchConfig; @@ -27,6 +28,7 @@ use model::machine::{ }; use model::machine_validation::{ MachineValidation, MachineValidationResult, MachineValidationState, MachineValidationStatus, + MachineValidationTest as ModelMachineValidationTest, MachineValidationTestAddRequest as ModelTestAddRequest, MachineValidationTestUpdateRequest as ModelTestUpdateRequest, MachineValidationTestsGetRequest as ModelTestsGetRequest, @@ -259,6 +261,21 @@ pub(crate) async fn persist_validation_result( } } + // Keep the durable run-item/attempt write ahead of the legacy projections. + // A false return means this report is a replay of an already-terminal attempt. + let first_terminal_report = + db::machine_validation_execution::record_result(&mut txn, &validation_result).await?; + if !first_terminal_report { + tracing::info!( + validation_id = %validation_result.validation_id, + machine_id = %machine.id, + test_id = ?validation_result.test_id, + "machine validation result ignored because attempt was already terminal" + ); + txn.commit().await?; + return Ok(tonic::Response::new(())); + } + // Update the Machine validation health report based on the result let mut updated_validation_health_report = machine.machine_validation_health_report(); updated_validation_health_report.observed_at = Some(chrono::Utc::now()); @@ -433,6 +450,100 @@ pub(crate) async fn get_machine_validation_runs( Ok(ret) } +pub(crate) async fn find_machine_validation_run_item_ids( + api: &Api, + request: tonic::Request, +) -> Result, Status> { + log_request_data(&request); + let req = request.into_inner(); + let validation_id = req + .validation_id + .as_ref() + .ok_or(CarbideError::MissingArgument("validation id"))?; + + let mut db_reader = api.db_reader(); + let run_item_ids = db::machine_validation_execution::find_run_item_ids_by_run_id( + &mut db_reader, + validation_id, + ) + .await? + .into_iter() + .map(|id| ::rpc::common::Uuid { + value: id.to_string(), + }) + .collect(); + + Ok(tonic::Response::new(rpc::MachineValidationRunItemIdList { + run_item_ids, + })) +} + +pub(crate) async fn find_machine_validation_run_items_by_ids( + api: &Api, + request: tonic::Request, +) -> Result, Status> { + log_request_data(&request); + let req = request.into_inner(); + + let max_find_by_ids = api.runtime_config.max_find_by_ids as usize; + if req.run_item_ids.len() > max_find_by_ids { + return Err(CarbideError::InvalidArgument(format!( + "no more than {max_find_by_ids} run_item_ids can be accepted" + )) + .into()); + } else if req.run_item_ids.is_empty() { + return Err(CarbideError::InvalidArgument( + "at least one run_item_id must be provided".to_string(), + ) + .into()); + } + + let run_item_ids = req + .run_item_ids + .iter() + .map(|id| { + uuid::Uuid::try_from(id) + .map(MachineValidationRunItemId::from) + .map_err(CarbideError::from) + }) + .collect::, _>>()?; + + let mut db_reader = api.db_reader(); + let run_items = + db::machine_validation_execution::find_run_items_by_ids(&mut db_reader, &run_item_ids) + .await? + .into_iter() + .map(rpc::MachineValidationRunItem::from) + .collect(); + + Ok(tonic::Response::new(rpc::MachineValidationRunItemList { + run_items, + })) +} + +pub(crate) async fn get_machine_validation_attempt( + api: &Api, + request: tonic::Request, +) -> Result, Status> { + log_request_data(&request); + let req = request.into_inner(); + let attempt_id = req + .attempt_id + .as_ref() + .ok_or(CarbideError::MissingArgument("attempt id"))?; + let attempt_id = MachineValidationAttemptId::from( + uuid::Uuid::try_from(attempt_id).map_err(CarbideError::from)?, + ); + + let attempt = + db::machine_validation_execution::find_attempt_by_id(&api.database_connection, &attempt_id) + .await?; + + Ok(tonic::Response::new(rpc::MachineValidationAttempt::from( + attempt, + ))) +} + pub(crate) async fn on_demand_machine_validation( api: &Api, request: tonic::Request, @@ -761,19 +872,46 @@ pub(crate) async fn update_machine_validation_run( let validation_id = req .validation_id - .as_ref() .ok_or(CarbideError::MissingArgument("Validation id"))?; + let selected_tests = req + .selected_tests + .into_iter() + .map(ModelMachineValidationTest::try_from) + .collect::, _>>()?; + let total = req + .total + .try_into() + .map_err(|_e| CarbideError::InvalidArgument("total".to_string()))?; + let total_len = + usize::try_from(total).map_err(|_e| CarbideError::InvalidArgument("total".to_string()))?; + + if !selected_tests.is_empty() && total_len != selected_tests.len() { + return Err(CarbideError::InvalidArgument( + "total must match selected_tests length".to_string(), + ) + .into()); + } db::machine_validation::update_run( &mut txn, - validation_id, - req.total - .try_into() - .map_err(|_e| CarbideError::InvalidArgument("total".to_string()))?, + &validation_id, + total, req.duration_to_complete.unwrap_or_default().seconds, ) .await?; + if !selected_tests.is_empty() { + let machine_validation = + db::machine_validation::find_by_id(&mut txn, &validation_id).await?; + db::machine_validation_execution::materialize_run_plan( + &mut txn, + &validation_id, + machine_validation.context.as_deref().unwrap_or_default(), + &selected_tests, + ) + .await?; + } + txn.commit().await?; Ok(tonic::Response::new(rpc::MachineValidationRunResponse { diff --git a/crates/api-core/src/machine_validation/mod.rs b/crates/api-core/src/machine_validation/mod.rs index 7e7b4aae5e..0759dab8a6 100644 --- a/crates/api-core/src/machine_validation/mod.rs +++ b/crates/api-core/src/machine_validation/mod.rs @@ -202,6 +202,8 @@ async fn reconcile_stale_validation( stale_run_timeout: std::time::Duration, now: chrono::DateTime, ) -> CarbideResult { + // Returns true only when this call actually transitions an active stale run. + // False means another path already completed or reconciled the run. let error_message = format!( "Machine validation run {} exceeded its expected duration plus stale timeout", validation.id diff --git a/crates/api-core/src/tests/common/api_fixtures/mod.rs b/crates/api-core/src/tests/common/api_fixtures/mod.rs index 0beb06c0bb..34ce4ea238 100644 --- a/crates/api-core/src/tests/common/api_fixtures/mod.rs +++ b/crates/api-core/src/tests/common/api_fixtures/mod.rs @@ -2736,6 +2736,7 @@ pub async fn update_machine_validation_run( validation_id, duration_to_complete, total, + selected_tests: Vec::new(), })) .await .unwrap() diff --git a/crates/api-core/src/tests/machine_validation.rs b/crates/api-core/src/tests/machine_validation.rs index f0e5411bc3..8cfbc3a3c9 100644 --- a/crates/api-core/src/tests/machine_validation.rs +++ b/crates/api-core/src/tests/machine_validation.rs @@ -22,7 +22,7 @@ use carbide_machine_controller::config::machine_validation::{ MachineValidationConfig, MachineValidationTestConfig, MachineValidationTestSelectionMode, }; use carbide_machine_controller::handler::MachineStateHandlerBuilder; -use carbide_uuid::machine_validation::MachineValidationId; +use carbide_uuid::machine_validation::{MachineValidationId, MachineValidationRunItemId}; use common::api_fixtures::{ TestEnvOverrides, create_host_with_machine_validation, create_test_env, create_test_env_with_overrides, get_config, get_machine_validation_results, @@ -1463,6 +1463,236 @@ async fn test_on_demant_machine_validation_all_contexts( Ok(()) } +#[crate::sqlx_test(fixtures("create_machine_validation_tests",))] +async fn test_machine_validation_m1_persists_selected_test_and_idempotent_result( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env(pool).await; + + let initial_result = rpc::forge::MachineValidationResult { + validation_id: None, + name: "test1".to_string(), + description: "desc".to_string(), + command: "echo".to_string(), + args: "test".to_string(), + std_out: "".to_string(), + std_err: "".to_string(), + context: "Discovery".to_string(), + exit_code: 0, + start_time: Some(Timestamp::from(SystemTime::now())), + end_time: Some(Timestamp::from(SystemTime::now())), + test_id: Some("test1".to_string()), + }; + let mh = create_host_with_machine_validation(&env, Some(initial_result), None).await; + let machine = mh.host().rpc_machine().await; + + let selected_test = env + .api + .get_machine_validation_tests(tonic::Request::new( + rpc::forge::MachineValidationTestsGetRequest { + test_id: Some("forge_dcgm_long_test".to_string()), + ..rpc::forge::MachineValidationTestsGetRequest::default() + }, + )) + .await? + .into_inner() + .tests + .into_iter() + .next() + .expect("machine validation fixture should include forge_dcgm_long_test"); + + let on_demand_response = on_demand_machine_validation( + &env, + machine.id.unwrap_or_default(), + Vec::new(), + vec![selected_test.test_id.clone()], + true, + vec!["OnDemand".to_string()], + ) + .await; + let validation_id = on_demand_response.validation_id.unwrap(); + + let mismatch = env + .api + .update_machine_validation_run(tonic::Request::new( + rpc::forge::MachineValidationRunRequest { + validation_id: Some(validation_id), + duration_to_complete: Some(rpc::Duration::from(std::time::Duration::from_secs( + selected_test.timeout.unwrap_or(7200).try_into().unwrap(), + ))), + total: 2, + selected_tests: vec![selected_test.clone()], + }, + )) + .await; + let Err(status) = mismatch else { + panic!("update_machine_validation_run should reject mismatched total"); + }; + assert_eq!(status.code(), tonic::Code::InvalidArgument); + assert!(status.message().contains("selected_tests")); + + env.api + .update_machine_validation_run(tonic::Request::new( + rpc::forge::MachineValidationRunRequest { + validation_id: Some(validation_id), + duration_to_complete: Some(rpc::Duration::from(std::time::Duration::from_secs( + selected_test.timeout.unwrap_or(7200).try_into().unwrap(), + ))), + total: 1, + selected_tests: vec![selected_test.clone()], + }, + )) + .await?; + + let run_item_ids = env + .api + .find_machine_validation_run_item_ids(tonic::Request::new( + rpc::forge::MachineValidationRunItemSearchFilter { + validation_id: Some(validation_id), + }, + )) + .await? + .into_inner() + .run_item_ids; + assert_eq!(run_item_ids.len(), 1); + + let run_items = env + .api + .find_machine_validation_run_items_by_ids(tonic::Request::new( + rpc::forge::MachineValidationRunItemsByIdsRequest { run_item_ids }, + )) + .await? + .into_inner() + .run_items; + assert_eq!(run_items.len(), 1); + assert_eq!(run_items[0].test_id, selected_test.test_id); + assert_eq!(run_items[0].state, "Pending"); + assert!(run_items[0].current_attempt_id.is_some()); + + let run_item_id = MachineValidationRunItemId::from(uuid::Uuid::try_from( + run_items[0].run_item_id.as_ref().unwrap(), + )?); + let pending_attempts = + db::machine_validation_execution::find_attempts_by_run_item_id(&env.pool, &run_item_id) + .await?; + assert_eq!(pending_attempts.len(), 1); + assert_eq!(pending_attempts[0].state.to_string(), "Pending"); + + env.run_machine_state_controller_iteration_until_state_matches( + &mh.host().id, + 1, + ManagedHostState::Validation { + validation_state: ValidationState::MachineValidation { + machine_validation: MachineValidatingState::RebootHost { validation_id }, + }, + }, + ) + .await; + let _ = mh.host().reboot_completed().await; + env.run_machine_state_controller_iteration_until_state_condition(&mh.host().id, 1, |machine| { + match machine.current_state() { + ManagedHostState::Validation { + validation_state: + ValidationState::MachineValidation { + machine_validation: MachineValidatingState::MachineValidating { id, .. }, + }, + } => *id == validation_id, + _ => false, + } + }) + .await; + + let terminal_result = rpc::forge::MachineValidationResult { + validation_id: Some(validation_id), + name: selected_test.name.clone(), + description: selected_test.description.clone().unwrap_or_default(), + command: selected_test.command.clone(), + args: selected_test.args.clone(), + std_out: "ok".to_string(), + std_err: String::new(), + context: "OnDemand".to_string(), + exit_code: 0, + start_time: Some(Timestamp::from(SystemTime::now())), + end_time: Some(Timestamp::from(SystemTime::now())), + test_id: Some(selected_test.test_id.clone()), + }; + env.api + .persist_validation_result(tonic::Request::new( + rpc::forge::MachineValidationResultPostRequest { + result: Some(terminal_result.clone()), + }, + )) + .await?; + + let replayed_result = rpc::forge::MachineValidationResult { + name: "changed replay name".to_string(), + std_out: "changed replay stdout".to_string(), + context: "Replay".to_string(), + ..terminal_result.clone() + }; + env.api + .persist_validation_result(tonic::Request::new( + rpc::forge::MachineValidationResultPostRequest { + result: Some(replayed_result), + }, + )) + .await?; + + let legacy_results = + db::machine_validation_result::find_by_validation_id(&env.pool, &validation_id).await?; + assert_eq!( + legacy_results + .iter() + .filter(|result| result.test_id == Some(selected_test.test_id.clone())) + .count(), + 1 + ); + + let terminal_attempts = + db::machine_validation_execution::find_attempts_by_run_item_id(&env.pool, &run_item_id) + .await?; + assert_eq!(terminal_attempts.len(), 1); + assert_eq!(terminal_attempts[0].state.to_string(), "Success"); + assert_eq!(terminal_attempts[0].exit_code, Some(0)); + assert_eq!(terminal_attempts[0].stdout_summary, Some("ok".to_string())); + + let attempt = env + .api + .get_machine_validation_attempt(tonic::Request::new( + rpc::forge::MachineValidationAttemptGetRequest { + attempt_id: run_items[0].current_attempt_id.clone(), + }, + )) + .await? + .into_inner(); + assert_eq!(attempt.state, "Success"); + + let terminal_run_items = env + .api + .find_machine_validation_run_items_by_ids(tonic::Request::new( + rpc::forge::MachineValidationRunItemsByIdsRequest { + run_item_ids: vec![run_items[0].run_item_id.clone().unwrap()], + }, + )) + .await? + .into_inner() + .run_items; + assert_eq!(terminal_run_items[0].state, "Success"); + assert_eq!(terminal_run_items[0].attempt, 1); + assert_eq!(terminal_run_items[0].display_name, selected_test.name); + assert_eq!(terminal_run_items[0].context, "OnDemand"); + + let runs = get_machine_validation_runs(&env, &mh.host().id, true).await; + let run = runs + .runs + .into_iter() + .find(|run| run.validation_id == Some(validation_id)) + .expect("on-demand validation run should be listed"); + assert_eq!(run.status.unwrap().completed_tests, 1); + + Ok(()) +} + #[crate::sqlx_test] async fn test_machine_validation_manager_reconciles_stale_run( pool: sqlx::PgPool, @@ -1584,6 +1814,7 @@ async fn test_machine_validation_manager_reconciles_stale_run( validation_id: Some(validation_id), duration_to_complete: Some(rpc::Duration::from(std::time::Duration::from_secs(1))), total: 1, + selected_tests: Vec::new(), }, )) .await; diff --git a/crates/api-db/migrations/20260613120000_machine_validation_execution_foundation.sql b/crates/api-db/migrations/20260613120000_machine_validation_execution_foundation.sql new file mode 100644 index 0000000000..892a8c800f --- /dev/null +++ b/crates/api-db/migrations/20260613120000_machine_validation_execution_foundation.sql @@ -0,0 +1,69 @@ +CREATE TABLE machine_validation_run_items ( + id uuid NOT NULL, + run_id uuid NOT NULL, + test_id TEXT NOT NULL, + test_version TEXT, + display_name TEXT NOT NULL, + context TEXT NOT NULL, + component TEXT, + state TEXT NOT NULL DEFAULT 'Pending', + order_index INTEGER NOT NULL, + attempt INTEGER NOT NULL DEFAULT 0, + max_attempts INTEGER NOT NULL DEFAULT 1, + timeout_seconds BIGINT NOT NULL DEFAULT 7200, + started_at TIMESTAMPTZ, + ended_at TIMESTAMPTZ, + last_heartbeat_at TIMESTAMPTZ, + skip_reason TEXT, + failure_reason TEXT, + PRIMARY KEY (id), + CONSTRAINT machine_validation_run_items_run_id_fk + FOREIGN KEY (run_id) REFERENCES machine_validation(id) ON DELETE CASCADE, + CONSTRAINT machine_validation_run_items_state_check + CHECK (state IN ('Pending', 'Running', 'Success', 'Skipped', 'Failed')), + CONSTRAINT machine_validation_run_items_attempt_check + CHECK (attempt >= 0), + CONSTRAINT machine_validation_run_items_max_attempts_check + CHECK (max_attempts > 0), + CONSTRAINT machine_validation_run_items_order_check + CHECK (order_index >= 0), + CONSTRAINT machine_validation_run_items_timeout_check + CHECK (timeout_seconds >= 0) +); + +CREATE UNIQUE INDEX machine_validation_run_items_run_test_idx + ON machine_validation_run_items (run_id, test_id); + +CREATE INDEX machine_validation_run_items_run_order_idx + ON machine_validation_run_items (run_id, order_index); + +CREATE TABLE machine_validation_attempts ( + id uuid NOT NULL, + run_item_id uuid NOT NULL, + attempt_number INTEGER NOT NULL, + state TEXT NOT NULL DEFAULT 'Pending', + command TEXT, + args TEXT, + container_image TEXT, + execute_in_host BOOLEAN, + exit_code INTEGER, + failure_classification TEXT, + started_at TIMESTAMPTZ, + ended_at TIMESTAMPTZ, + last_heartbeat_at TIMESTAMPTZ, + stdout_summary TEXT, + stderr_summary TEXT, + PRIMARY KEY (id), + CONSTRAINT machine_validation_attempts_run_item_id_fk + FOREIGN KEY (run_item_id) REFERENCES machine_validation_run_items(id) ON DELETE CASCADE, + CONSTRAINT machine_validation_attempts_state_check + CHECK (state IN ('Pending', 'Running', 'Success', 'Skipped', 'Failed')), + CONSTRAINT machine_validation_attempts_attempt_number_check + CHECK (attempt_number > 0) +); + +CREATE UNIQUE INDEX machine_validation_attempts_item_attempt_idx + ON machine_validation_attempts (run_item_id, attempt_number); + +CREATE INDEX machine_validation_attempts_run_item_idx + ON machine_validation_attempts (run_item_id); diff --git a/crates/api-db/src/lib.rs b/crates/api-db/src/lib.rs index cec9f0e0fe..4a2ae6d658 100644 --- a/crates/api-db/src/lib.rs +++ b/crates/api-db/src/lib.rs @@ -57,6 +57,7 @@ pub mod machine_interface_address; pub mod machine_topology; pub mod machine_validation; pub mod machine_validation_config; +pub mod machine_validation_execution; pub mod machine_validation_result; pub mod machine_validation_suites; pub mod managed_host; diff --git a/crates/api-db/src/machine_validation_execution.rs b/crates/api-db/src/machine_validation_execution.rs new file mode 100644 index 0000000000..f6ecec4c5f --- /dev/null +++ b/crates/api-db/src/machine_validation_execution.rs @@ -0,0 +1,536 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use carbide_uuid::machine_validation::{ + MachineValidationAttemptId, MachineValidationId, MachineValidationRunItemId, +}; +use model::machine_validation::{ + MachineValidationAttempt, MachineValidationAttemptState, MachineValidationResult, + MachineValidationRunItem, MachineValidationRunItemState, MachineValidationTest, +}; +use sqlx::PgConnection; + +use crate::db_read::DbReader; +use crate::{DatabaseError, DatabaseResult, machine_validation_suites}; + +const DEFAULT_TIMEOUT_SECONDS: i64 = 7200; +// M1 persists Scout's existing sequential result stream as a single attempt per test. +// Retry-aware events will need to carry attempt identity before this can vary. +const INITIAL_ATTEMPT_NUMBER: i32 = 1; +const SUMMARY_LIMIT: usize = 4096; + +pub async fn materialize_run_plan( + txn: &mut PgConnection, + run_id: &MachineValidationId, + context: &str, + selected_tests: &[MachineValidationTest], +) -> DatabaseResult<()> { + for (order_index, test) in selected_tests.iter().enumerate() { + let order_index = i32::try_from(order_index).map_err(|_| { + DatabaseError::InvalidArgument( + "machine validation run has too many selected tests".to_string(), + ) + })?; + let run_item_id = + upsert_run_item_from_test(txn, run_id, context, test, order_index).await?; + upsert_pending_attempt(txn, &run_item_id, test).await?; + } + + Ok(()) +} + +pub async fn find_run_items_by_run_id( + txn: impl DbReader<'_>, + run_id: &MachineValidationId, +) -> DatabaseResult> { + const QUERY: &str = " + SELECT + run_item.*, + current_attempt.id AS current_attempt_id + FROM machine_validation_run_items run_item + LEFT JOIN LATERAL ( + SELECT id + FROM machine_validation_attempts attempt + WHERE attempt.run_item_id=run_item.id + ORDER BY attempt.attempt_number DESC + LIMIT 1 + ) current_attempt ON true + WHERE run_item.run_id=$1 + ORDER BY run_item.order_index, run_item.display_name"; + + sqlx::query_as::<_, MachineValidationRunItem>(QUERY) + .bind(run_id) + .fetch_all(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e)) +} + +pub async fn find_run_item_ids_by_run_id( + txn: impl DbReader<'_>, + run_id: &MachineValidationId, +) -> DatabaseResult> { + const QUERY: &str = " + SELECT id + FROM machine_validation_run_items + WHERE run_id=$1 + ORDER BY order_index, display_name"; + + sqlx::query_scalar::<_, MachineValidationRunItemId>(QUERY) + .bind(run_id) + .fetch_all(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e)) +} + +pub async fn find_run_items_by_ids( + txn: impl DbReader<'_>, + ids: &[MachineValidationRunItemId], +) -> DatabaseResult> { + if ids.is_empty() { + return Ok(Vec::new()); + } + + const QUERY: &str = " + SELECT + run_item.*, + current_attempt.id AS current_attempt_id + FROM machine_validation_run_items run_item + LEFT JOIN LATERAL ( + SELECT id + FROM machine_validation_attempts attempt + WHERE attempt.run_item_id=run_item.id + ORDER BY attempt.attempt_number DESC + LIMIT 1 + ) current_attempt ON true + WHERE run_item.id=ANY($1) + ORDER BY run_item.order_index, run_item.display_name"; + + sqlx::query_as::<_, MachineValidationRunItem>(QUERY) + .bind(ids) + .fetch_all(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e)) +} + +pub async fn find_attempt_by_id( + txn: impl DbReader<'_>, + id: &MachineValidationAttemptId, +) -> DatabaseResult { + const QUERY: &str = "SELECT * FROM machine_validation_attempts WHERE id=$1"; + + sqlx::query_as::<_, MachineValidationAttempt>(QUERY) + .bind(id) + .fetch_optional(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e))? + .ok_or_else(|| DatabaseError::NotFoundError { + kind: "machine_validation_attempt", + id: id.to_string(), + }) +} + +pub async fn find_attempts_by_run_item_id( + txn: impl DbReader<'_>, + run_item_id: &MachineValidationRunItemId, +) -> DatabaseResult> { + const QUERY: &str = " + SELECT * FROM machine_validation_attempts + WHERE run_item_id=$1 + ORDER BY attempt_number"; + + sqlx::query_as::<_, MachineValidationAttempt>(QUERY) + .bind(run_item_id) + .fetch_all(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e)) +} + +pub async fn record_result( + txn: &mut PgConnection, + result: &MachineValidationResult, +) -> DatabaseResult { + let run_item_id = upsert_run_item_from_result(txn, result).await?; + let state = state_from_result(result); + let stdout_summary = truncate_summary(&result.stdout); + let stderr_summary = truncate_summary(&result.stderr); + let failure_classification = + (state == MachineValidationAttemptState::Failed).then(|| "CommandFailed".to_string()); + + let updated_first_terminal = update_pending_attempt_from_result( + txn, + &run_item_id, + result, + &state, + stdout_summary.as_deref(), + stderr_summary.as_deref(), + failure_classification.as_deref(), + ) + .await?; + + let first_terminal = if updated_first_terminal { + true + } else { + insert_terminal_attempt_from_result( + txn, + &run_item_id, + result, + &state, + stdout_summary.as_deref(), + stderr_summary.as_deref(), + failure_classification.as_deref(), + ) + .await? + }; + + if first_terminal { + update_run_item_from_result( + txn, + &run_item_id, + result, + &state, + stdout_summary.as_deref(), + stderr_summary.as_deref(), + ) + .await?; + } + + Ok(first_terminal) +} + +async fn upsert_run_item_from_test( + txn: &mut PgConnection, + run_id: &MachineValidationId, + context: &str, + test: &MachineValidationTest, + order_index: i32, +) -> DatabaseResult { + const QUERY: &str = " + WITH upserted AS ( + INSERT INTO machine_validation_run_items ( + id, + run_id, + test_id, + test_version, + display_name, + context, + component, + state, + order_index, + attempt, + max_attempts, + timeout_seconds + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, 0, 1, $10) + ON CONFLICT (run_id, test_id) DO UPDATE + SET + test_version=EXCLUDED.test_version, + display_name=EXCLUDED.display_name, + context=EXCLUDED.context, + component=EXCLUDED.component, + order_index=EXCLUDED.order_index, + max_attempts=EXCLUDED.max_attempts, + timeout_seconds=EXCLUDED.timeout_seconds + WHERE machine_validation_run_items.state IN ('Pending', 'Running') + RETURNING id + ) + SELECT id FROM upserted + UNION ALL + SELECT id + FROM machine_validation_run_items + WHERE run_id=$2 AND test_id=$3 + LIMIT 1"; + + let id = MachineValidationRunItemId::new(); + sqlx::query_scalar::<_, MachineValidationRunItemId>(QUERY) + .bind(id) + .bind(run_id) + .bind(&test.test_id) + .bind(test.version.version_string()) + .bind(&test.name) + .bind(context) + .bind(test.components.first()) + .bind(MachineValidationRunItemState::Pending.to_string()) + .bind(order_index) + .bind(test.timeout.unwrap_or(DEFAULT_TIMEOUT_SECONDS)) + .fetch_one(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e)) +} + +async fn upsert_pending_attempt( + txn: &mut PgConnection, + run_item_id: &MachineValidationRunItemId, + test: &MachineValidationTest, +) -> DatabaseResult<()> { + const QUERY: &str = " + INSERT INTO machine_validation_attempts ( + id, + run_item_id, + attempt_number, + state, + command, + args, + container_image, + execute_in_host + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + ON CONFLICT (run_item_id, attempt_number) DO UPDATE + SET + command=EXCLUDED.command, + args=EXCLUDED.args, + container_image=EXCLUDED.container_image, + execute_in_host=EXCLUDED.execute_in_host + WHERE machine_validation_attempts.state IN ('Pending', 'Running')"; + + sqlx::query(QUERY) + .bind(MachineValidationAttemptId::new()) + .bind(run_item_id) + .bind(INITIAL_ATTEMPT_NUMBER) + .bind(MachineValidationAttemptState::Pending.to_string()) + .bind(&test.command) + .bind(&test.args) + .bind(test.img_name.as_ref()) + .bind(test.execute_in_host) + .execute(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e))?; + Ok(()) +} + +async fn upsert_run_item_from_result( + txn: &mut PgConnection, + result: &MachineValidationResult, +) -> DatabaseResult { + const QUERY: &str = " + WITH upserted AS ( + INSERT INTO machine_validation_run_items ( + id, + run_id, + test_id, + display_name, + context, + state, + order_index, + attempt, + max_attempts, + timeout_seconds + ) + VALUES ( + $1, + $2, + $3, + $4, + $5, + $6, + COALESCE((SELECT MAX(order_index) + 1 FROM machine_validation_run_items WHERE run_id=$2), 0), + 0, + 1, + $7 + ) + ON CONFLICT (run_id, test_id) DO UPDATE + SET + display_name=EXCLUDED.display_name, + context=EXCLUDED.context + WHERE machine_validation_run_items.state IN ('Pending', 'Running') + RETURNING id + ) + SELECT id FROM upserted + UNION ALL + SELECT id + FROM machine_validation_run_items + WHERE run_id=$2 AND test_id=$3 + LIMIT 1"; + + sqlx::query_scalar::<_, MachineValidationRunItemId>(QUERY) + .bind(MachineValidationRunItemId::new()) + .bind(result.validation_id) + .bind(result_test_id(result)) + .bind(&result.name) + .bind(&result.context) + .bind(MachineValidationRunItemState::Pending.to_string()) + .bind(DEFAULT_TIMEOUT_SECONDS) + .fetch_one(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e)) +} + +async fn update_pending_attempt_from_result( + txn: &mut PgConnection, + run_item_id: &MachineValidationRunItemId, + result: &MachineValidationResult, + state: &MachineValidationAttemptState, + stdout_summary: Option<&str>, + stderr_summary: Option<&str>, + failure_classification: Option<&str>, +) -> DatabaseResult { + const QUERY: &str = " + UPDATE machine_validation_attempts + SET + state=$3, + command=$4, + args=$5, + exit_code=$6, + failure_classification=$7, + started_at=$8, + ended_at=$9, + last_heartbeat_at=$9, + stdout_summary=$10, + stderr_summary=$11 + WHERE run_item_id=$1 + AND attempt_number=$2 + AND state IN ('Pending', 'Running') + RETURNING id"; + + let updated = sqlx::query_scalar::<_, MachineValidationAttemptId>(QUERY) + .bind(run_item_id) + .bind(INITIAL_ATTEMPT_NUMBER) + .bind(state.to_string()) + .bind(&result.command) + .bind(&result.args) + .bind(result.exit_code) + .bind(failure_classification) + .bind(result.start_time) + .bind(result.end_time) + .bind(stdout_summary) + .bind(stderr_summary) + .fetch_optional(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e))?; + Ok(updated.is_some()) +} + +async fn insert_terminal_attempt_from_result( + txn: &mut PgConnection, + run_item_id: &MachineValidationRunItemId, + result: &MachineValidationResult, + state: &MachineValidationAttemptState, + stdout_summary: Option<&str>, + stderr_summary: Option<&str>, + failure_classification: Option<&str>, +) -> DatabaseResult { + const QUERY: &str = " + INSERT INTO machine_validation_attempts ( + id, + run_item_id, + attempt_number, + state, + command, + args, + exit_code, + failure_classification, + started_at, + ended_at, + last_heartbeat_at, + stdout_summary, + stderr_summary + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $10, $11, $12) + ON CONFLICT (run_item_id, attempt_number) DO NOTHING + RETURNING id"; + + let inserted = sqlx::query_scalar::<_, MachineValidationAttemptId>(QUERY) + .bind(MachineValidationAttemptId::new()) + .bind(run_item_id) + .bind(INITIAL_ATTEMPT_NUMBER) + .bind(state.to_string()) + .bind(&result.command) + .bind(&result.args) + .bind(result.exit_code) + .bind(failure_classification) + .bind(result.start_time) + .bind(result.end_time) + .bind(stdout_summary) + .bind(stderr_summary) + .fetch_optional(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e))?; + Ok(inserted.is_some()) +} + +async fn update_run_item_from_result( + txn: &mut PgConnection, + run_item_id: &MachineValidationRunItemId, + result: &MachineValidationResult, + state: &MachineValidationAttemptState, + stdout_summary: Option<&str>, + stderr_summary: Option<&str>, +) -> DatabaseResult<()> { + const QUERY: &str = " + UPDATE machine_validation_run_items + SET + state=$2, + attempt=$3, + started_at=$4, + ended_at=$5, + last_heartbeat_at=$5, + skip_reason=$6, + failure_reason=$7 + WHERE id=$1"; + + let skip_reason = (*state == MachineValidationAttemptState::Skipped) + .then(|| stdout_summary.or(stderr_summary).unwrap_or_default()); + let failure_reason = (*state == MachineValidationAttemptState::Failed) + .then(|| stderr_summary.or(stdout_summary).unwrap_or_default()); + + sqlx::query(QUERY) + .bind(run_item_id) + .bind(run_item_state(state).to_string()) + .bind(INITIAL_ATTEMPT_NUMBER) + .bind(result.start_time) + .bind(result.end_time) + .bind(skip_reason) + .bind(failure_reason) + .execute(txn) + .await + .map_err(|e| DatabaseError::query(QUERY, e))?; + Ok(()) +} + +fn result_test_id(result: &MachineValidationResult) -> String { + result + .test_id + .clone() + .unwrap_or_else(|| machine_validation_suites::generate_test_id(&result.name)) +} + +fn state_from_result(result: &MachineValidationResult) -> MachineValidationAttemptState { + if result.exit_code == 0 && result.stdout.trim_start().starts_with("Skipped") { + MachineValidationAttemptState::Skipped + } else if result.exit_code == 0 { + MachineValidationAttemptState::Success + } else { + MachineValidationAttemptState::Failed + } +} + +fn run_item_state(state: &MachineValidationAttemptState) -> MachineValidationRunItemState { + match state { + MachineValidationAttemptState::Pending => MachineValidationRunItemState::Pending, + MachineValidationAttemptState::Running => MachineValidationRunItemState::Running, + MachineValidationAttemptState::Success => MachineValidationRunItemState::Success, + MachineValidationAttemptState::Skipped => MachineValidationRunItemState::Skipped, + MachineValidationAttemptState::Failed => MachineValidationRunItemState::Failed, + } +} + +fn truncate_summary(value: &str) -> Option { + if value.is_empty() { + None + } else { + Some(value.chars().take(SUMMARY_LIMIT).collect()) + } +} diff --git a/crates/api-model/src/machine_validation.rs b/crates/api-model/src/machine_validation.rs index 788a1eba8a..57c555722f 100644 --- a/crates/api-model/src/machine_validation.rs +++ b/crates/api-model/src/machine_validation.rs @@ -18,7 +18,9 @@ use std::fmt::{Debug, Display}; use std::str::FromStr; use carbide_uuid::machine::MachineId; -use carbide_uuid::machine_validation::MachineValidationId; +use carbide_uuid::machine_validation::{ + MachineValidationAttemptId, MachineValidationId, MachineValidationRunItemId, +}; use chrono::{DateTime, Utc}; use config_version::ConfigVersion; use serde::{Deserialize, Serialize}; @@ -114,6 +116,51 @@ pub struct MachineValidationStatus { pub completed: i32, } +#[derive(Debug, Clone, PartialEq, Eq, Default, strum_macros::EnumString)] +pub enum MachineValidationRunItemState { + #[default] + Pending, + Running, + Success, + Skipped, + Failed, +} + +impl Display for MachineValidationRunItemState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(self, f) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Default, strum_macros::EnumString)] +pub enum MachineValidationAttemptState { + #[default] + Pending, + Running, + Success, + Skipped, + Failed, +} + +impl Display for MachineValidationAttemptState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(self, f) + } +} + +fn decode_state(raw: String, column: &'static str) -> Result +where + T: FromStr, + T::Err: Display, +{ + T::from_str(&raw).map_err(|err| { + sqlx::Error::Decode(Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("invalid {column}: {raw} ({err})"), + ))) + }) +} + #[derive(Debug, Clone)] pub struct MachineValidation { pub id: MachineValidationId, @@ -156,6 +203,104 @@ impl<'r> FromRow<'r, PgRow> for MachineValidation { } } +#[derive(Debug, Clone)] +pub struct MachineValidationRunItem { + pub id: MachineValidationRunItemId, + pub run_id: MachineValidationId, + pub current_attempt_id: Option, + pub test_id: String, + pub test_version: Option, + pub display_name: String, + pub context: String, + pub component: Option, + pub state: MachineValidationRunItemState, + pub order_index: i32, + pub attempt: i32, + pub max_attempts: i32, + pub timeout_seconds: i64, + pub started_at: Option>, + pub ended_at: Option>, + pub last_heartbeat_at: Option>, + pub skip_reason: Option, + pub failure_reason: Option, +} + +impl<'r> FromRow<'r, PgRow> for MachineValidationRunItem { + fn from_row(row: &'r PgRow) -> Result { + let state_raw: String = row.try_get("state")?; + + Ok(MachineValidationRunItem { + id: row.try_get("id")?, + run_id: row.try_get("run_id")?, + current_attempt_id: match row + .try_get::, _>("current_attempt_id") + { + Ok(value) => value, + Err(sqlx::Error::ColumnNotFound(_)) => None, + Err(err) => return Err(err), + }, + test_id: row.try_get("test_id")?, + test_version: row.try_get("test_version")?, + display_name: row.try_get("display_name")?, + context: row.try_get("context")?, + component: row.try_get("component")?, + state: decode_state(state_raw, "machine_validation_run_items.state")?, + order_index: row.try_get("order_index")?, + attempt: row.try_get("attempt")?, + max_attempts: row.try_get("max_attempts")?, + timeout_seconds: row.try_get("timeout_seconds")?, + started_at: row.try_get("started_at")?, + ended_at: row.try_get("ended_at")?, + last_heartbeat_at: row.try_get("last_heartbeat_at")?, + skip_reason: row.try_get("skip_reason")?, + failure_reason: row.try_get("failure_reason")?, + }) + } +} + +#[derive(Debug, Clone)] +pub struct MachineValidationAttempt { + pub id: MachineValidationAttemptId, + pub run_item_id: MachineValidationRunItemId, + pub attempt_number: i32, + pub state: MachineValidationAttemptState, + pub command: Option, + pub args: Option, + pub container_image: Option, + pub execute_in_host: Option, + pub exit_code: Option, + pub failure_classification: Option, + pub started_at: Option>, + pub ended_at: Option>, + pub last_heartbeat_at: Option>, + pub stdout_summary: Option, + pub stderr_summary: Option, +} + +impl<'r> FromRow<'r, PgRow> for MachineValidationAttempt { + fn from_row(row: &'r PgRow) -> Result { + let state_raw: String = row.try_get("state")?; + + Ok(MachineValidationAttempt { + id: row.try_get("id")?, + run_item_id: row.try_get("run_item_id")?, + attempt_number: row.try_get("attempt_number")?, + state: decode_state(state_raw, "machine_validation_attempts.state")?, + command: row.try_get("command")?, + args: row.try_get("args")?, + container_image: row.try_get("container_image")?, + execute_in_host: row.try_get("execute_in_host")?, + exit_code: row.try_get("exit_code")?, + failure_classification: row.try_get("failure_classification")?, + started_at: row.try_get("started_at")?, + ended_at: row.try_get("ended_at")?, + last_heartbeat_at: row.try_get("last_heartbeat_at")?, + stdout_summary: row.try_get("stdout_summary")?, + stderr_summary: row.try_get("stderr_summary")?, + }) + } +} + #[derive(Debug, Deserialize, Clone, Serialize)] pub struct MachineValidationExternalConfig { pub name: String, @@ -391,6 +536,134 @@ mod tests { ); } + #[test] + fn run_item_state_from_str_parses_every_variant_and_rejects_the_rest() { + scenarios!( + run = |s| MachineValidationRunItemState::from_str(s).map_err(drop); + "Pending" { + "Pending" => Yields(MachineValidationRunItemState::Pending), + } + + "Running" { + "Running" => Yields(MachineValidationRunItemState::Running), + } + + "Success" { + "Success" => Yields(MachineValidationRunItemState::Success), + } + + "Skipped" { + "Skipped" => Yields(MachineValidationRunItemState::Skipped), + } + + "Failed" { + "Failed" => Yields(MachineValidationRunItemState::Failed), + } + + "empty string" { + "" => Fails, + } + + "unknown variant" { + "Started" => Fails, + } + + "lowercase is not accepted" { + "pending" => Fails, + } + ); + } + + #[test] + fn run_item_state_display_renders_the_variant_name() { + value_scenarios!( + run = |state| state.to_string(); + "Pending" { + MachineValidationRunItemState::Pending => "Pending".to_string(), + } + + "Running" { + MachineValidationRunItemState::Running => "Running".to_string(), + } + + "Success" { + MachineValidationRunItemState::Success => "Success".to_string(), + } + + "Skipped" { + MachineValidationRunItemState::Skipped => "Skipped".to_string(), + } + + "Failed" { + MachineValidationRunItemState::Failed => "Failed".to_string(), + } + ); + } + + #[test] + fn attempt_state_from_str_parses_every_variant_and_rejects_the_rest() { + scenarios!( + run = |s| MachineValidationAttemptState::from_str(s).map_err(drop); + "Pending" { + "Pending" => Yields(MachineValidationAttemptState::Pending), + } + + "Running" { + "Running" => Yields(MachineValidationAttemptState::Running), + } + + "Success" { + "Success" => Yields(MachineValidationAttemptState::Success), + } + + "Skipped" { + "Skipped" => Yields(MachineValidationAttemptState::Skipped), + } + + "Failed" { + "Failed" => Yields(MachineValidationAttemptState::Failed), + } + + "empty string" { + "" => Fails, + } + + "unknown variant" { + "Started" => Fails, + } + + "lowercase is not accepted" { + "pending" => Fails, + } + ); + } + + #[test] + fn attempt_state_display_renders_the_variant_name() { + value_scenarios!( + run = |state| state.to_string(); + "Pending" { + MachineValidationAttemptState::Pending => "Pending".to_string(), + } + + "Running" { + MachineValidationAttemptState::Running => "Running".to_string(), + } + + "Success" { + MachineValidationAttemptState::Success => "Success".to_string(), + } + + "Skipped" { + MachineValidationAttemptState::Skipped => "Skipped".to_string(), + } + + "Failed" { + MachineValidationAttemptState::Failed => "Failed".to_string(), + } + ); + } + #[test] fn state_default_is_started() { Check { diff --git a/crates/machine-controller/src/handler/machine_validation.rs b/crates/machine-controller/src/handler/machine_validation.rs index 06928a4392..79f975d0cd 100644 --- a/crates/machine-controller/src/handler/machine_validation.rs +++ b/crates/machine-controller/src/handler/machine_validation.rs @@ -51,7 +51,7 @@ async fn skip_machine_validation( tracing::info!( %machine_id, machine_validation_id = %validation_id, - "skipped machine validation completion ignored because run is no longer active" + "machine validation completion ignored because run is no longer active" ); return Ok(StateHandlerOutcome::do_nothing().with_txn(txn)); } diff --git a/crates/machine-validation/src/lib.rs b/crates/machine-validation/src/lib.rs index f452025d06..4a339644a3 100644 --- a/crates/machine-validation/src/lib.rs +++ b/crates/machine-validation/src/lib.rs @@ -160,7 +160,8 @@ impl MachineValidationManager { ..rpc::forge::MachineValidationRunRequest::default() }; let mut expected_time_duration = 0; - for test in tests.clone() { + let mut selected_tests = Vec::new(); + for test in &tests { if !machine_validation_filter.allowed_tests.is_empty() && !machine_validation_filter .allowed_tests @@ -171,7 +172,9 @@ impl MachineValidationManager { } run_request.total += 1; expected_time_duration += test.timeout.unwrap_or(7200); + selected_tests.push(test.clone()); } + run_request.selected_tests = selected_tests; run_request.duration_to_complete = Some(rpc::Duration::from( std::time::Duration::from_secs(expected_time_duration as u64), )); diff --git a/crates/rpc/build.rs b/crates/rpc/build.rs index aad53fb881..661f7202cc 100644 --- a/crates/rpc/build.rs +++ b/crates/rpc/build.rs @@ -541,6 +541,19 @@ fn main() -> Result<(), Box> { .type_attribute("MachineValidationResult", "#[derive(serde::Serialize)]") .type_attribute("MachineValidationRunList", "#[derive(serde::Serialize)]") .type_attribute("MachineValidationRun", "#[derive(serde::Serialize)]") + .type_attribute( + "MachineValidationRunItemList", + "#[derive(serde::Serialize)]", + ) + .type_attribute( + "MachineValidationRunItemIdList", + "#[derive(serde::Serialize)]", + ) + .type_attribute("MachineValidationRunItem", "#[derive(serde::Serialize)]") + .type_attribute( + "MachineValidationAttempt", + "#[derive(serde::Serialize)]", + ) .type_attribute("ExpectedHostNic", "#[derive(serde::Serialize)]") .type_attribute("ExpectedHostNic", "#[derive(serde::Deserialize)]") .type_attribute("HostLifecycleProfile", "#[derive(serde::Serialize, serde::Deserialize)]") @@ -1009,6 +1022,10 @@ fn main() -> Result<(), Box> { ".common.IpxeTemplateId", "::carbide_uuid::ipxe_template::IpxeTemplateId", ), + ( + ".common.MachineValidationId", + "::carbide_uuid::machine_validation::MachineValidationId", + ), (".common.RackId", "::carbide_uuid::rack::RackId"), ( ".common.RackProfileId", diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index 085815f7f8..47d058f751 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -605,6 +605,15 @@ service Forge { // Machine-Validation executed list rpc GetMachineValidationRuns(MachineValidationRunListGetRequest) returns (MachineValidationRunList); + // Machine-Validation run item IDs + rpc FindMachineValidationRunItemIds(MachineValidationRunItemSearchFilter) returns (MachineValidationRunItemIdList); + + // Machine-Validation run items by IDs + rpc FindMachineValidationRunItemsByIds(MachineValidationRunItemsByIdsRequest) returns (MachineValidationRunItemList); + + // Machine-Validation attempt detail + rpc GetMachineValidationAttempt(MachineValidationAttemptGetRequest) returns (MachineValidationAttempt); + // Remove ExternalConfig rpc RemoveMachineValidationExternalConfig(RemoveMachineValidationExternalConfigRequest) returns (google.protobuf.Empty); // Machine-Validation test list @@ -6148,6 +6157,65 @@ message MachineValidationRunListGetRequest { bool include_history = 2; } +message MachineValidationRunItemSearchFilter { + common.MachineValidationId validation_id = 1; +} + +message MachineValidationRunItemIdList { + repeated common.UUID run_item_ids = 1; +} + +message MachineValidationRunItemsByIdsRequest { + repeated common.UUID run_item_ids = 1; +} + +message MachineValidationRunItemList { + repeated MachineValidationRunItem run_items = 1; +} + +message MachineValidationRunItem { + common.UUID run_item_id = 1; + common.MachineValidationId validation_id = 2; + string test_id = 3; + optional string test_version = 4; + string display_name = 5; + string context = 6; + optional string component = 7; + string state = 8; + uint32 order_index = 9; + uint32 attempt = 10; + uint32 max_attempts = 11; + google.protobuf.Duration timeout = 12; + google.protobuf.Timestamp started_at = 13; + google.protobuf.Timestamp ended_at = 14; + google.protobuf.Timestamp last_heartbeat_at = 15; + optional string skip_reason = 16; + optional string failure_reason = 17; + common.UUID current_attempt_id = 18; +} + +message MachineValidationAttemptGetRequest { + common.UUID attempt_id = 1; +} + +message MachineValidationAttempt { + common.UUID attempt_id = 1; + common.UUID run_item_id = 2; + uint32 attempt_number = 3; + string state = 4; + optional string command = 5; + optional string args = 6; + optional string container_image = 7; + optional bool execute_in_host = 8; + optional int32 exit_code = 9; + optional string failure_classification = 10; + google.protobuf.Timestamp started_at = 11; + google.protobuf.Timestamp ended_at = 12; + google.protobuf.Timestamp last_heartbeat_at = 13; + optional string stdout_summary = 14; + optional string stderr_summary = 15; +} + message IsBmcInManagedHostResponse { bool in_managed_host = 1; } @@ -6274,6 +6342,7 @@ message MachineValidationRunRequest { common.MachineValidationId validation_id = 1; google.protobuf.Duration duration_to_complete = 2; uint32 total = 3; + repeated MachineValidationTest selected_tests = 4; } message MachineValidationRunResponse { string message = 1; diff --git a/crates/rpc/src/model/machine_validation.rs b/crates/rpc/src/model/machine_validation.rs index 832b2734cd..94b3b91e1f 100644 --- a/crates/rpc/src/model/machine_validation.rs +++ b/crates/rpc/src/model/machine_validation.rs @@ -19,10 +19,10 @@ use std::str::FromStr; use chrono::{DateTime, Utc}; use config_version::ConfigVersion; use model::machine_validation::{ - MachineValidation, MachineValidationExternalConfig, MachineValidationResult, - MachineValidationState, MachineValidationTest, MachineValidationTestAddRequest, - MachineValidationTestUpdatePayload, MachineValidationTestUpdateRequest, - MachineValidationTestsGetRequest, + MachineValidation, MachineValidationAttempt, MachineValidationExternalConfig, + MachineValidationResult, MachineValidationRunItem, MachineValidationState, + MachineValidationTest, MachineValidationTestAddRequest, MachineValidationTestUpdatePayload, + MachineValidationTestUpdateRequest, MachineValidationTestsGetRequest, }; use crate as rpc; @@ -165,6 +165,63 @@ impl From for rpc::forge::MachineValidationRun { } } +impl From for rpc::forge::MachineValidationRunItem { + fn from(value: MachineValidationRunItem) -> Self { + rpc::forge::MachineValidationRunItem { + run_item_id: Some(rpc::common::Uuid { + value: value.id.to_string(), + }), + current_attempt_id: value.current_attempt_id.map(|id| rpc::common::Uuid { + value: id.to_string(), + }), + validation_id: Some(value.run_id), + test_id: value.test_id, + test_version: value.test_version, + display_name: value.display_name, + context: value.context, + component: value.component, + state: value.state.to_string(), + order_index: value.order_index.try_into().unwrap_or(0), + attempt: value.attempt.try_into().unwrap_or(0), + max_attempts: value.max_attempts.try_into().unwrap_or(0), + timeout: Some(rpc::Duration::from(std::time::Duration::from_secs( + value.timeout_seconds.try_into().unwrap_or(0), + ))), + started_at: value.started_at.map(Into::into), + ended_at: value.ended_at.map(Into::into), + last_heartbeat_at: value.last_heartbeat_at.map(Into::into), + skip_reason: value.skip_reason, + failure_reason: value.failure_reason, + } + } +} + +impl From for rpc::forge::MachineValidationAttempt { + fn from(value: MachineValidationAttempt) -> Self { + rpc::forge::MachineValidationAttempt { + attempt_id: Some(rpc::common::Uuid { + value: value.id.to_string(), + }), + run_item_id: Some(rpc::common::Uuid { + value: value.run_item_id.to_string(), + }), + attempt_number: value.attempt_number.try_into().unwrap_or(0), + state: value.state.to_string(), + command: value.command, + args: value.args, + container_image: value.container_image, + execute_in_host: value.execute_in_host, + exit_code: value.exit_code, + failure_classification: value.failure_classification, + started_at: value.started_at.map(Into::into), + ended_at: value.ended_at.map(Into::into), + last_heartbeat_at: value.last_heartbeat_at.map(Into::into), + stdout_summary: value.stdout_summary, + stderr_summary: value.stderr_summary, + } + } +} + impl From for rpc::forge::MachineValidationExternalConfig { fn from(value: MachineValidationExternalConfig) -> Self { rpc::forge::MachineValidationExternalConfig { @@ -312,8 +369,17 @@ impl TryFrom for MachineValidationResult { #[cfg(test)] mod tests { + use carbide_uuid::machine_validation::{ + MachineValidationAttemptId, MachineValidationId, MachineValidationRunItemId, + }; + use model::machine_validation::{MachineValidationAttemptState, MachineValidationRunItemState}; + use super::*; + fn id(value: &str) -> uuid::Uuid { + uuid::Uuid::parse_str(value).unwrap() + } + #[test] fn tests_get_request_from_rpc() { let rpc_req = rpc::forge::MachineValidationTestsGetRequest { @@ -365,4 +431,368 @@ mod tests { assert_eq!(payload.is_enabled, Some(false)); assert!(payload.name.is_none()); } + + #[test] + fn run_item_from_model_maps_populated_and_sparse_values() { + struct Case { + name: &'static str, + item: MachineValidationRunItem, + has_current_attempt: bool, + has_test_version: bool, + has_component: bool, + has_started_at: bool, + has_ended_at: bool, + has_last_heartbeat_at: bool, + has_skip_reason: bool, + has_failure_reason: bool, + } + + let cases = [ + Case { + name: "populated", + item: MachineValidationRunItem { + id: MachineValidationRunItemId::from(id( + "10000000-0000-0000-0000-000000000001", + )), + run_id: MachineValidationId::from(id("20000000-0000-0000-0000-000000000001")), + current_attempt_id: Some(MachineValidationAttemptId::from(id( + "30000000-0000-0000-0000-000000000001", + ))), + test_id: "test-a".to_string(), + test_version: Some("1".to_string()), + display_name: "Test A".to_string(), + context: "OnDemand".to_string(), + component: Some("GPU".to_string()), + state: MachineValidationRunItemState::Running, + order_index: 2, + attempt: 1, + max_attempts: 3, + timeout_seconds: 90, + started_at: DateTime::::from_timestamp(10, 0), + ended_at: DateTime::::from_timestamp(20, 0), + last_heartbeat_at: DateTime::::from_timestamp(15, 0), + skip_reason: Some("skipped".to_string()), + failure_reason: Some("failed".to_string()), + }, + has_current_attempt: true, + has_test_version: true, + has_component: true, + has_started_at: true, + has_ended_at: true, + has_last_heartbeat_at: true, + has_skip_reason: true, + has_failure_reason: true, + }, + Case { + name: "sparse", + item: MachineValidationRunItem { + id: MachineValidationRunItemId::from(id( + "10000000-0000-0000-0000-000000000002", + )), + run_id: MachineValidationId::from(id("20000000-0000-0000-0000-000000000002")), + current_attempt_id: None, + test_id: "test-b".to_string(), + test_version: None, + display_name: "Test B".to_string(), + context: "Discovery".to_string(), + component: None, + state: MachineValidationRunItemState::Pending, + order_index: 0, + attempt: 0, + max_attempts: 1, + timeout_seconds: 0, + started_at: None, + ended_at: None, + last_heartbeat_at: None, + skip_reason: None, + failure_reason: None, + }, + has_current_attempt: false, + has_test_version: false, + has_component: false, + has_started_at: false, + has_ended_at: false, + has_last_heartbeat_at: false, + has_skip_reason: false, + has_failure_reason: false, + }, + ]; + + for case in cases { + let item = case.item.clone(); + let rpc_item = rpc::forge::MachineValidationRunItem::from(item.clone()); + + assert_eq!( + rpc_item.run_item_id.unwrap().value, + item.id.to_string(), + "{}", + case.name + ); + assert_eq!( + rpc_item.validation_id.unwrap().to_string(), + item.run_id.to_string(), + "{}", + case.name + ); + assert_eq!( + rpc_item.current_attempt_id.is_some(), + case.has_current_attempt, + "{}", + case.name + ); + assert_eq!(rpc_item.test_id, item.test_id, "{}", case.name); + assert_eq!( + rpc_item.test_version.is_some(), + case.has_test_version, + "{}", + case.name + ); + assert_eq!(rpc_item.display_name, item.display_name, "{}", case.name); + assert_eq!(rpc_item.context, item.context, "{}", case.name); + assert_eq!( + rpc_item.component.is_some(), + case.has_component, + "{}", + case.name + ); + assert_eq!(rpc_item.state, item.state.to_string(), "{}", case.name); + assert_eq!( + rpc_item.order_index, + u32::try_from(item.order_index).unwrap(), + "{}", + case.name + ); + assert_eq!( + rpc_item.attempt, + u32::try_from(item.attempt).unwrap(), + "{}", + case.name + ); + assert_eq!( + rpc_item.max_attempts, + u32::try_from(item.max_attempts).unwrap(), + "{}", + case.name + ); + assert_eq!( + rpc_item.timeout.unwrap().seconds, + item.timeout_seconds, + "{}", + case.name + ); + assert_eq!( + rpc_item.started_at.is_some(), + case.has_started_at, + "{}", + case.name + ); + assert_eq!( + rpc_item.ended_at.is_some(), + case.has_ended_at, + "{}", + case.name + ); + assert_eq!( + rpc_item.last_heartbeat_at.is_some(), + case.has_last_heartbeat_at, + "{}", + case.name + ); + assert_eq!( + rpc_item.skip_reason.is_some(), + case.has_skip_reason, + "{}", + case.name + ); + assert_eq!( + rpc_item.failure_reason.is_some(), + case.has_failure_reason, + "{}", + case.name + ); + } + } + + #[test] + fn attempt_from_model_maps_populated_and_sparse_values() { + struct Case { + name: &'static str, + attempt: MachineValidationAttempt, + has_command: bool, + has_args: bool, + has_container_image: bool, + has_execute_in_host: bool, + has_exit_code: bool, + has_failure_classification: bool, + has_started_at: bool, + has_ended_at: bool, + has_last_heartbeat_at: bool, + has_stdout_summary: bool, + has_stderr_summary: bool, + } + + let cases = [ + Case { + name: "populated", + attempt: MachineValidationAttempt { + id: MachineValidationAttemptId::from(id( + "30000000-0000-0000-0000-000000000002", + )), + run_item_id: MachineValidationRunItemId::from(id( + "10000000-0000-0000-0000-000000000003", + )), + attempt_number: 2, + state: MachineValidationAttemptState::Success, + command: Some("/bin/test".to_string()), + args: Some("--verbose".to_string()), + container_image: Some("image:tag".to_string()), + execute_in_host: Some(true), + exit_code: Some(0), + failure_classification: Some("none".to_string()), + started_at: DateTime::::from_timestamp(30, 0), + ended_at: DateTime::::from_timestamp(40, 0), + last_heartbeat_at: DateTime::::from_timestamp(35, 0), + stdout_summary: Some("stdout".to_string()), + stderr_summary: Some("stderr".to_string()), + }, + has_command: true, + has_args: true, + has_container_image: true, + has_execute_in_host: true, + has_exit_code: true, + has_failure_classification: true, + has_started_at: true, + has_ended_at: true, + has_last_heartbeat_at: true, + has_stdout_summary: true, + has_stderr_summary: true, + }, + Case { + name: "sparse", + attempt: MachineValidationAttempt { + id: MachineValidationAttemptId::from(id( + "30000000-0000-0000-0000-000000000003", + )), + run_item_id: MachineValidationRunItemId::from(id( + "10000000-0000-0000-0000-000000000004", + )), + attempt_number: 1, + state: MachineValidationAttemptState::Pending, + command: None, + args: None, + container_image: None, + execute_in_host: None, + exit_code: None, + failure_classification: None, + started_at: None, + ended_at: None, + last_heartbeat_at: None, + stdout_summary: None, + stderr_summary: None, + }, + has_command: false, + has_args: false, + has_container_image: false, + has_execute_in_host: false, + has_exit_code: false, + has_failure_classification: false, + has_started_at: false, + has_ended_at: false, + has_last_heartbeat_at: false, + has_stdout_summary: false, + has_stderr_summary: false, + }, + ]; + + for case in cases { + let attempt = case.attempt.clone(); + let rpc_attempt = rpc::forge::MachineValidationAttempt::from(attempt.clone()); + + assert_eq!( + rpc_attempt.attempt_id.unwrap().value, + attempt.id.to_string(), + "{}", + case.name + ); + assert_eq!( + rpc_attempt.run_item_id.unwrap().value, + attempt.run_item_id.to_string(), + "{}", + case.name + ); + assert_eq!( + rpc_attempt.attempt_number, + u32::try_from(attempt.attempt_number).unwrap(), + "{}", + case.name + ); + assert_eq!( + rpc_attempt.state, + attempt.state.to_string(), + "{}", + case.name + ); + assert_eq!( + rpc_attempt.command.is_some(), + case.has_command, + "{}", + case.name + ); + assert_eq!(rpc_attempt.args.is_some(), case.has_args, "{}", case.name); + assert_eq!( + rpc_attempt.container_image.is_some(), + case.has_container_image, + "{}", + case.name + ); + assert_eq!( + rpc_attempt.execute_in_host.is_some(), + case.has_execute_in_host, + "{}", + case.name + ); + assert_eq!( + rpc_attempt.exit_code.is_some(), + case.has_exit_code, + "{}", + case.name + ); + assert_eq!( + rpc_attempt.failure_classification.is_some(), + case.has_failure_classification, + "{}", + case.name + ); + assert_eq!( + rpc_attempt.started_at.is_some(), + case.has_started_at, + "{}", + case.name + ); + assert_eq!( + rpc_attempt.ended_at.is_some(), + case.has_ended_at, + "{}", + case.name + ); + assert_eq!( + rpc_attempt.last_heartbeat_at.is_some(), + case.has_last_heartbeat_at, + "{}", + case.name + ); + assert_eq!( + rpc_attempt.stdout_summary.is_some(), + case.has_stdout_summary, + "{}", + case.name + ); + assert_eq!( + rpc_attempt.stderr_summary.is_some(), + case.has_stderr_summary, + "{}", + case.name + ); + } + } } diff --git a/crates/uuid/src/machine_validation/mod.rs b/crates/uuid/src/machine_validation/mod.rs index 6514e1fda0..ff76ebff46 100644 --- a/crates/uuid/src/machine_validation/mod.rs +++ b/crates/uuid/src/machine_validation/mod.rs @@ -27,11 +27,53 @@ impl UuidSubtype for MachineValidationIdMarker { /// MachineValidationId is a strongly typed UUID for MachineValidations. pub type MachineValidationId = TypedUuid; +/// Marker type for MachineValidationRunItemId +pub struct MachineValidationRunItemIdMarker; + +impl UuidSubtype for MachineValidationRunItemIdMarker { + const TYPE_NAME: &'static str = "MachineValidationRunItemId"; +} + +/// MachineValidationRunItemId is a strongly typed UUID for validation run items. +pub type MachineValidationRunItemId = TypedUuid; + +/// Marker type for MachineValidationAttemptId +pub struct MachineValidationAttemptIdMarker; + +impl UuidSubtype for MachineValidationAttemptIdMarker { + const TYPE_NAME: &'static str = "MachineValidationAttemptId"; +} + +/// MachineValidationAttemptId is a strongly typed UUID for validation attempts. +pub type MachineValidationAttemptId = TypedUuid; + #[cfg(test)] -mod tests { +mod machine_validation_id_tests { use super::*; use crate::typed_uuid_tests; // Run all boilerplate TypedUuid tests for this type, also // ensuring TYPE_NAME and DB_COLUMN_NAME test correctly. typed_uuid_tests!(MachineValidationId, "MachineValidationId", "id"); } + +#[cfg(test)] +mod machine_validation_run_item_id_tests { + use super::*; + use crate::typed_uuid_tests; + typed_uuid_tests!( + MachineValidationRunItemId, + "MachineValidationRunItemId", + "id" + ); +} + +#[cfg(test)] +mod machine_validation_attempt_id_tests { + use super::*; + use crate::typed_uuid_tests; + typed_uuid_tests!( + MachineValidationAttemptId, + "MachineValidationAttemptId", + "id" + ); +}