From 6e79fe472dd4d29f598791155b7edb8cc1daf9c6 Mon Sep 17 00:00:00 2001 From: Alexander Dorn Date: Thu, 7 May 2026 09:13:20 +0200 Subject: [PATCH] fix(plan): stateless re-plan must not flag healthy host as recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `src/cli/plan.rs:131-135` and `src/cli/apply.rs:909-913` chose between `ApplyRunDisplayState::FirstRun` and `::Recovery` based on a binary heuristic on the observed snapshot: let run_display_state = if observed_snapshot.objects.is_empty() { ApplyRunDisplayState::FirstRun } else { ApplyRunDisplayState::Recovery }; In stateless mode (`--source-repo`), there is no `/var/lib/core-ops/` baseline by design (FR-013 / SC-009), so the controller cannot tell "host has units because a prior apply succeeded" from "host has units because a prior apply failed mid-flight." The heuristic therefore emitted `(recovery from failed initial apply)` in the rendered header on **every** stateless invocation against a host that has converged state — including immediately after a perfectly clean `Outcome: converged` apply. End-to-end this looked like: $ core-ops apply --source-repo examples/03-immich --host example Apply for host example @ (stateless) (first run) ... 10 created ... Outcome: converged $ core-ops plan --source-repo examples/03-immich --host example Plan for host example @ (stateless) (recovery from failed initial apply) [·] Unchanged • 10 Summary: 10 unchanged The "recovery from failed initial apply" tagline directly contradicts the immediately preceding `Outcome: converged` and "10 unchanged" signals, undercutting the demonstrated trustworthiness of the canonical `examples/03-immich` walkthrough that spec/018 ships in the README and `docs/onboarding.cast`. The comment immediately above the buggy heuristic stated the correct intent — "Treat as FirstRun for header-rendering purposes — the source path is always the primary identifier in the rendered header" — but the implementation diverged. This change makes the implementation match the comment's intent: in stateless mode the path-based provenance prefix (`(stateless)`) carries the relevant signal alone; the first-run / recovery suffix does not apply once the host has observed objects. Resolution: - Add `ApplyRunDisplayState::Stateless` (`src/cli/report.rs:33`). Renders no suffix beyond the version identifier; the caller's `(stateless)` provenance prefix is preserved. - Update both stateless call sites (`src/cli/plan.rs`, `src/cli/apply.rs`) to select `Stateless` when the host has observed objects, falling back to `FirstRun` when the host is empty (the latter case keeps "(first run)" as a useful initial- apply signal). - The init'd-mode helpers (`classify_plan_run_display_state`, `classify_apply_run_display_state`) are unchanged: their heuristic is correct because they DO consult `last_applied_revision`, which can disambiguate first-run / recovery / managed states. - Added unit test `tests/integration/test_apply_report.rs::stateless_run_display_state_omits_first_run_and_recovery_suffixes` locking in the new behavior. Verification on `core-ops-uat` (Fedora CoreOS guest): Empty host: $ core-ops plan --source-repo examples/03-immich --host example Plan for host example @ (stateless) (first run) $ sudo core-ops apply --source-repo examples/03-immich --host example Apply for host example @ (stateless) (first run) Converged host: $ core-ops plan --source-repo examples/03-immich --host example Plan for host example @ (stateless) [·] Unchanged • 10 Patch bump (modify under `src/`, `rust_source_surface` rule). Discovered while exercising the canonical Immich walkthrough for spec/018 session 3; the misleading "(recovery from failed initial apply)" landed in the spec/018 asciicast recording, directly undercutting the slice's "increase adoption likelihood" goal. $ cargo test 473 passed $ cargo clippy --all-targets -- -D warnings clean $ cargo run --bin core-ops-release -- validate --base-ref master passed (patch) Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 3 ++ Cargo.lock | 2 +- Cargo.toml | 2 +- changes/fix-stateless-plan-annotation.md | 7 +++ src/cli/apply.rs | 8 ++- src/cli/plan.rs | 15 ++++-- src/cli/report.rs | 13 +++++ .../provenance_state/valid-success.json | 2 +- tests/integration/test_apply_report.rs | 52 +++++++++++++++++++ 9 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 changes/fix-stateless-plan-annotation.md diff --git a/CHANGELOG.md b/CHANGELOG.md index f91934e..0f1e0b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ versioning for public release policy decisions. ## [Unreleased] +### Changed + +- Stateless `plan` and `apply` no longer flag a healthy host as "recovery from failed initial apply"; introduce `ApplyRunDisplayState::Stateless` so the rendered header carries only the `(stateless)` path-based provenance prefix when the host has converged objects but no `/var/lib/core-ops/` baseline. Empty-host invocations continue to render `(first run)` as before. ## [2.2.2] - 2026-05-07 diff --git a/Cargo.lock b/Cargo.lock index 3404b3f..826b824 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -157,7 +157,7 @@ checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "core-ops" -version = "2.2.2" +version = "2.2.3" dependencies = [ "clap", "libc", diff --git a/Cargo.toml b/Cargo.toml index 1886bcb..5a749f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "core-ops" -version = "2.2.2" +version = "2.2.3" edition = "2021" license = "AGPL-3.0-or-later" diff --git a/changes/fix-stateless-plan-annotation.md b/changes/fix-stateless-plan-annotation.md new file mode 100644 index 0000000..51457fd --- /dev/null +++ b/changes/fix-stateless-plan-annotation.md @@ -0,0 +1,7 @@ +--- +change_id: fix-stateless-plan-annotation +release_intent: patch +summary: Stateless `plan` and `apply` no longer flag a healthy host as "recovery from failed initial apply"; introduce `ApplyRunDisplayState::Stateless` so the rendered header carries only the `(stateless)` path-based provenance prefix when the host has converged objects but no `/var/lib/core-ops/` baseline. Empty-host invocations continue to render `(first run)` as before. +scope: cli +release_preparation: false +--- diff --git a/src/cli/apply.rs b/src/cli/apply.rs index bd6f3e9..73e8808 100644 --- a/src/cli/apply.rs +++ b/src/cli/apply.rs @@ -906,10 +906,16 @@ pub fn apply_with_report_stateless( { deterministic.scope_id = scope_id.clone(); } + // Stateless mode: empty host → FirstRun (informative for an actual + // initial apply); non-empty host → Stateless (suppress the + // misleading "recovery from failed initial apply" suffix — the + // controller cannot distinguish a successful prior apply from a + // failed one in stateless mode). The `(stateless)` path-based + // provenance prefix carries the operationally relevant signal. let run_display_state = if observed_snapshot.objects.is_empty() { ApplyRunDisplayState::FirstRun } else { - ApplyRunDisplayState::Recovery + ApplyRunDisplayState::Stateless }; let human_report = format_apply_output_report( &deterministic, diff --git a/src/cli/plan.rs b/src/cli/plan.rs index a5f5f4a..2f13953 100644 --- a/src/cli/plan.rs +++ b/src/cli/plan.rs @@ -125,13 +125,20 @@ pub fn plan_stateless( verify_state(&result.desired, &observed), ); // Stateless mode has no last_applied baseline by design (init'd - // state is never read, FR-013 / SC-009). Treat as FirstRun for - // header-rendering purposes — the source path is always the - // primary identifier in the rendered header. + // state is never read, FR-013 / SC-009). When the host is empty, + // "(first run)" is still useful and accurate; the user is about + // to create everything from scratch. When the host already has + // observed objects we cannot safely tag the run as "recovery + // from failed initial apply" — there is no record distinguishing + // "host has units because a prior apply succeeded" from "host + // has units because a prior apply failed mid-flight." Fall back + // to the dedicated `Stateless` variant in that case so the + // path-based provenance prefix (`(stateless)`) carries the + // operationally relevant signal alone. let run_display_state = if observed_snapshot.objects.is_empty() { ApplyRunDisplayState::FirstRun } else { - ApplyRunDisplayState::Recovery + ApplyRunDisplayState::Stateless }; let mut deterministic = reconcile_deterministic_plan_with_runtime( &desired_snapshot, diff --git a/src/cli/report.rs b/src/cli/report.rs index 63b6f1f..da1c889 100644 --- a/src/cli/report.rs +++ b/src/cli/report.rs @@ -34,6 +34,13 @@ pub enum ApplyRunDisplayState { Managed, FirstRun, Recovery, + /// Stateless mode (`--source-repo`): no `/var/lib/core-ops/` baseline is + /// read or written, so the controller cannot distinguish "host has units + /// because a prior apply succeeded" from "host has units because a prior + /// apply failed mid-flight." The first-run / recovery suffix does not + /// apply; the path-based provenance prefix (e.g. `(stateless)`) carries + /// the operationally relevant signal. + Stateless, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -1997,6 +2004,9 @@ fn apply_header_revision_context( crate::cli::status::render_revision_with_requested_ref(target, requested_ref) ) } + ApplyRunDisplayState::Stateless => { + crate::cli::status::render_revision_with_requested_ref(target, requested_ref) + } ApplyRunDisplayState::Managed => match previous { Some(previous) if previous != target => { format!( @@ -2659,6 +2669,9 @@ fn plan_header_revision_context_with_state( "{} (recovery from failed initial apply)", crate::cli::status::render_revision_with_requested_ref(target, requested_ref) ), + (_, ApplyRunDisplayState::Stateless) => { + crate::cli::status::render_revision_with_requested_ref(target, requested_ref) + } (None, ApplyRunDisplayState::Managed) => { crate::cli::status::render_revision_with_requested_ref(target, requested_ref) } diff --git a/tests/fixtures/provenance_state/valid-success.json b/tests/fixtures/provenance_state/valid-success.json index 260358b..b74aa6c 100644 --- a/tests/fixtures/provenance_state/valid-success.json +++ b/tests/fixtures/provenance_state/valid-success.json @@ -1,7 +1,7 @@ { "schema_version": 1, "controller": { - "version": "2.2.2", + "version": "2.2.3", "revision": "8f3c2ab", "build_time": "2026-03-23T10:00:00Z", "tree_state": "clean" diff --git a/tests/integration/test_apply_report.rs b/tests/integration/test_apply_report.rs index 18bc284..6597483 100644 --- a/tests/integration/test_apply_report.rs +++ b/tests/integration/test_apply_report.rs @@ -294,6 +294,58 @@ fn apply_report_surfaces_verification_failures_for_unchanged_objects() { assert!(rendered.contains("Outcome: non-converging")); } +#[test] +fn stateless_run_display_state_omits_first_run_and_recovery_suffixes() { + // Stateless mode: regardless of whether the host has observed + // objects, the rendered header MUST NOT carry "(first run)" or + // "(recovery from failed initial apply)" suffixes — neither is + // distinguishable in stateless mode (no /var/lib/core-ops/ + // baseline is read or written). The path-based provenance prefix + // (e.g. `(stateless)` in the source-path identifier rendered by + // the caller) is the operationally relevant signal. + let plan_first = DeterministicReconciliationPlan { + desired_revision_id: Some("rev-2".to_string()), + baseline_revision_id: None, + requested_repository: None, + requested_ref: None, + last_applied_requested_repository: None, + last_applied_requested_ref: None, + scope_id: "host:alpha".to_string(), + actions: vec![DeterministicPlannedAction { + object_id: "frontend.container".to_string(), + classification: DeterministicActionClass::Update, + reason: "actual state diverged from desired snapshot".to_string(), + dependency_context: Vec::new(), + semantic_diff: Default::default(), + }], + drift_records: Vec::new(), + graph: SemanticDependencyGraph { + nodes: vec![SemanticDependencyNode { + object_id: "frontend.container".to_string(), + object_kind: ManagedObjectKind::QuadletResource, + ordering_key: "frontend.container".to_string(), + }], + edges: Vec::new(), + }, + }; + + let stateless_apply = format_apply_output_report( + &plan_first, + &[], + None, + ApplyHumanMode::Default, + ApplyRunDisplayState::Stateless, + ); + assert!( + !stateless_apply.contains("(first run)"), + "stateless apply header must not carry (first run): {stateless_apply}" + ); + assert!( + !stateless_apply.contains("(recovery from failed initial apply)"), + "stateless apply header must not carry (recovery from failed initial apply): {stateless_apply}" + ); +} + #[test] fn apply_streaming_report_emits_progress_then_terminal_lines() { let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner());