cozystack · Andrei Kvapil (kvaps) · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
diff --git a/docs/cli-parity-known-deltas.md b/docs/cli-parity-known-deltas.md
@@ -15,6 +15,7 @@ Row IDs match the command-catalogue indexes used by `cli-parity-refresh.sh` (see
 
 | # | command | delta_kind | accepted_until | why |
 |---|---------|------------|----------------|-----|
+| 03 | `sp l` | WIRE_SHAPE | permanent | ENV_TOPOLOGY + envelope-shape. After pool-name + table-border normalization the `sp l` listing still diverges because the BS stand and the upstream oracle are provisioned with DIFFERENT storage-pool topologies (BS carries `lvm-thin` / `zfs-thin` / `zfs-thick` rows the FILE_THIN-only oracle never had) and because of two deliberate wire-shape choices on the FILE_THIN pool: (a) BS leaves the FILE_THIN `PoolName` cell blank where the oracle renders its backing-dir path `/var/lib/linstor-oracle-<pool>` (BS's FILE_THIN provider has no operator-facing PoolName — same family as row 04's StorDriver omission); (b) BS leaves the `SharedName` column blank where the oracle stamps `<node>;<pool>`, and BS provisions a single shared `DfltDisklessStorPool` row rather than one per node. None of these are behavioural deltas — they reflect the stand's pool fixtures and BS's leaner FILE_THIN DTO; linstor-csi / piraeus-operator read pool capacity (`sp l -m`, row 79) not the PoolName/SharedName render columns. Whitelisted so a topology-shaped `sp l` diff does not block the refresh; a genuine capacity/driver regression still surfaces via `sp l -m` (row 79) and `sp l --show-props` (row 04). |
 | 04 | `sp l --show-props StorDriver/*` | WIRE_SHAPE | permanent | BLOCKSTOR_SUPERSET: BS surfaces extra `StorDriver/LvmVg`/`ThinPool` keys; client glob still matches → CLI render identical. Operator-visible parity OK. |
 | 08 | `r l` (tiebreaker layers DRBD,STORAGE) | WIRE_SHAPE | 2026-12-31 | F10 residual: DRBD layer enrichment (`may_promote`, `promotion_score`, `node_id`, `al_*`) not yet stamped. Audit refresh 2026-05-19 reclassifies F10 as partial — `drbdtop`-style monitoring depends on it but CSI does not. |
 | 18 | `controller version` | WIRE_SHAPE | permanent | Intentional version stamping: BS reports `1.33.2+ git=blockstor`. Downstream tooling MUST NOT grep a hex git_hash from BS. |
@@ -71,3 +72,4 @@ These rows are **NOT** whitelisted on purpose — they appear in the audit but b
 - 2026-05-14 — original one-shot audit `docs/cli-parity-audit-2026-05-14.md`.
 - 2026-05-19 — refresh `docs/cli-parity-audit-2026-05-19-refresh.md`; F1-F20 closed (F10 partial residual remains as accepted delta #08); L7 harness `261d9e32f` lands re-runnable cli-parity-refresh.sh as the going-forward audit driver.
 - 2026-06-14 — reclassified two cli-matrix cells from "bug" to intentional BS contract: row 84 (tiebreaker→diskful promotion full-syncs by design, `anyDiskfulPeerHasData` gate avoids the UUID-mint StandAlone wedge) and row 85 (bare `SyncSource`/`SyncTarget` literal is the intended shape when `OutOfSyncKib<=0`). Cells `r-c-over-tiebreaker-skip-sync.sh` and `r-l-conns-shapes.sh` now assert the BS-intended behaviour.
+- 2026-06-14 — extended `cli-parity-refresh.sh`'s `normalize_side_output` to collapse ASCII-table border-width drift (pool-name length sets column width) and the upstream-only `DfltRscGrp PlaceCount` render, clearing the `rg l` (05) and `rg l --pastable` (20) false WIRE_SHAPE diffs → both now PARITY. The residual `sp l` (03) divergence is a genuine ENV_TOPOLOGY + FILE_THIN envelope-shape delta and is whitelisted as row 03 (not masked).
diff --git a/tests/burnin-blockstor.sh b/tests/burnin-blockstor.sh
@@ -135,20 +135,46 @@ EOF
     fi
 
     DEV=$(on_node "$PRIMARY" bash -c "grep -oE '/dev/drbd[0-9]+' /etc/drbd.d/${RD}.res | head -1")
+
+    # Write 1 MiB urandom on PRIMARY and capture its md5, then read it
+    # back on PEER and compare. Each remote read GUARDS dd's exit code
+    # (via PIPESTATUS) AND the byte count: under churn `dd` can fail to
+    # open /dev/drbdN (EAGAIN, device transiently busy) and read ZERO
+    # bytes; md5("") is a fixed digest, so an unguarded read produces a
+    # FALSE mismatch alarm that would drown out a real future divergence.
+    # The md5 is computed by piping dd STRAIGHT into md5sum (never via a
+    # shell variable — command substitution strips NUL bytes, which a
+    # binary 1 MiB read is full of). On a guarded read failure the snippet
+    # emits the sentinel "READFAIL"; the iteration's compare is then
+    # SKIPPED (not counted as a real FAIL) and re-tried next iteration.
+    # `bs=1M count=1` reads exactly 1048576 bytes on success.
+    EXPECT_BYTES=1048576
     PRIMARY_MD5=$(on_node "$PRIMARY" bash -c "
         drbdadm primary ${RD}
         dd if=/dev/urandom of=${DEV} bs=1M count=1 status=none oflag=direct
-        dd if=${DEV} bs=1M count=1 status=none iflag=direct | md5sum | awk '{print \$1}'
+        md5=\$(dd if=${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-primary.err | md5sum | awk '{print \$1}')
+        rc=\${PIPESTATUS[0]}
-        md5=\$(dd if=${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-primary.err | md5sum | awk '{print \$1}')
-        rc=\${PIPESTATUS[0]}
+        md5=\$(set -o pipefail; dd if=\${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-primary.err | md5sum | awk '{print \$1}')\n        rc=\$?
-        md5=\$(dd if=${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-primary.err | md5sum | awk '{print \$1}')
-        rc=\${PIPESTATUS[0]}
+        md5=\$(set -o pipefail; dd if=\${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-primary.err | md5sum | awk '{print \$1}')\n        rc=\$?
+        n=\$(awk '/bytes/ {print \$1; exit}' /tmp/burnin-dd-primary.err)
         drbdadm secondary ${RD}
+        if [ \"\$rc\" -ne 0 ] || [ \"\$n\" != \"${EXPECT_BYTES}\" ]; then echo 'READFAIL'; else echo \"\$md5\"; fi
     " | tail -1)
 
     PEER_MD5=$(on_node "$PEER" bash -c "
         drbdadm primary ${RD}
-        dd if=${DEV} bs=1M count=1 status=none iflag=direct | md5sum | awk '{print \$1}'
+        md5=\$(dd if=${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-peer.err | md5sum | awk '{print \$1}')
+        rc=\${PIPESTATUS[0]}
-        md5=\$(dd if=${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-peer.err | md5sum | awk '{print \$1}')
-        rc=\${PIPESTATUS[0]}
+        md5=\$(set -o pipefail; dd if=\${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-peer.err | md5sum | awk '{print \$1}')\n        rc=\$?
-        md5=\$(dd if=${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-peer.err | md5sum | awk '{print \$1}')
-        rc=\${PIPESTATUS[0]}
+        md5=\$(set -o pipefail; dd if=\${DEV} bs=1M count=1 iflag=direct 2>/tmp/burnin-dd-peer.err | md5sum | awk '{print \$1}')\n        rc=\$?
+        n=\$(awk '/bytes/ {print \$1; exit}' /tmp/burnin-dd-peer.err)
         drbdadm secondary ${RD}
+        if [ \"\$rc\" -ne 0 ] || [ \"\$n\" != \"${EXPECT_BYTES}\" ]; then echo 'READFAIL'; else echo \"\$md5\"; fi
     " | tail -1)
 
-    if [[ "$PRIMARY_MD5" == "$PEER_MD5" ]]; then
+    if [[ "$PRIMARY_MD5" == "READFAIL" || "$PEER_MD5" == "READFAIL" \
+          || -z "$PRIMARY_MD5" || -z "$PEER_MD5" ]]; then
+        # Transient read failure on at least one side — neither PASS nor
+        # FAIL. A real mismatch is only credible when BOTH reads succeeded
+        # and returned the full 1 MiB.
+        echo "[$(date -u +%FT%TZ)] iter=$ITER SKIP: transient dd read failure (primary='$PRIMARY_MD5' peer='$PEER_MD5'); not comparing"
+    elif [[ "$PRIMARY_MD5" == "$PEER_MD5" ]]; then
         PASS=$((PASS + 1))
     else
         FAIL=$((FAIL + 1))

diff --git a/tests/e2e/cli-matrix/bug-278-skipdisk-autoclear-after-reattach.sh b/tests/e2e/cli-matrix/bug-278-skipdisk-autoclear-after-reattach.sh
@@ -11,29 +11,53 @@
 # `drbdadm adjust --skip-disk` and the local volume stayed Diskless
 # even though the kernel was healthy.
 #
-# Bug 278 fix: when the reconciler sees `SkipDisk=True` AND the kernel
-# reports the local volume as non-Diskless (UpToDate / Inconsistent /
-# Outdated), the satellite releases the observer's SSA claim on the
-# SkipDisk key via SkipDiskClearer. The next dispatcher cycle
-# re-resolves Spec.Props without SkipDisk, the FSM transitions
-# PhaseSkipDisk → PhaseRunning, and the next reconcile dispatches plain
-# `drbdadm adjust` to re-attach the lower disk.
+# Bug 278 fix (pkg/satellite/reconciler.go runAdjust + SkipDiskClearer):
+# on EVERY reconcile, when the reconciler sees the observer-owned
+# `SkipDisk=True` stamp AND the kernel probe reports the local volume
+# as non-Diskless (HasDisklessVolume==false — UpToDate / Inconsistent /
+# Outdated, backing storage attached), the satellite releases the
+# observer's SSA claim on the SkipDisk key via SkipDiskClearer. The next
+# dispatcher cycle re-resolves Spec.Props without SkipDisk, the FSM
+# transitions PhaseSkipDisk → PhaseRunning, and the next reconcile
+# dispatches plain `drbdadm adjust` to keep the lower disk attached.
 #
-# Contract this cell pins:
+# Two-sided contract this cell pins (BUG-046 triage corrected the
+# previous revision — see below):
 #
-#   1. Steady-state: 2-replica diskful RD, both UpToDate.
-#   2. Stamp SkipDisk=True onto Resource.Spec.Props on $N2 (simulates
-#      the observer's defensive write on a transient Failed event the
-#      kernel emits at Talos upgrade time).
-#   3. Restart the satellite pod on $N2 (simulates Talos kernel
-#      restart). The pod reattaches, sees the SkipDisk prop, but
-#      kernel state on $N2 is healthy (disk:UpToDate, backing_dev
-#      present — the lower LV survived the OS upgrade).
-#   4. Within 60s assert: Spec.Props NO LONGER contains DrbdOptions/SkipDisk
-#      AND Status.volumes[0].diskState is back to UpToDate.
+#   A. PERSISTENCE (positive control). On a replica whose kernel volume
+#      really is Diskless (HasDisklessVolume==true), the auto-clear gate
+#      is INTENTIONALLY closed: a defensive SkipDisk stamp there MUST
+#      survive — that is the exact state SkipDisk is meant to guard. This
+#      proves the clearer is selective (it does not blindly wipe every
+#      stamp) and that the stamp wiring works at all.
 #
-# Pre-fix: SkipDisk stays pinned forever; the cell would FAIL at
-# step 4 because Spec.Props still carries the key after 60s.
+#   B. AUTO-CLEAR. On a HEALTHY (UpToDate) replica the satellite auto-
+#      clears the observer-owned defensive stamp, so the volume stays
+#      attached after a Talos-upgrade reattach. We restart the satellite
+#      pod on that node (the documented Talos kernel-restart shape), and
+#      assert the stamp is gone and the disk is UpToDate.
+#
+# Why the previous revision could never pass (BUG-046 triage): it stamped
+# SkipDisk on a HEALTHY UpToDate replica and then read the prop BACK as a
+# setup precondition (`assert stamped == "True"`). But the auto-clear
+# fires on EVERY reconcile of a healthy slot, so the satellite cleared
+# the stamp between the SSA apply and the read-back — the precondition
+# raced the product's correct behaviour and aborted with "stamp did not
+# land". The stamped-state on a healthy disk is transient BY DESIGN; the
+# cell now asserts the auto-clear directly (poll for ABSENCE, the steady
+# state) and uses the genuinely-persistent Diskless replica as the
+# positive control instead of a brittle read-back of a doomed stamp.
+#
+# The SSA stamp is applied under the OBSERVER's field manager
+# (`blockstor-satellite-skipdisk`, pkg/satellite/controllers/observer.go)
+# so it carries the exact SSA ownership the real defensive write has.
+# The auto-clear RELEASES that owner's claim — that is the whole product
+# contract: the satellite un-stamps only its own defensive writes, while
+# an operator-set SkipDisk (different field manager) would survive. The
+# apply doc carries the two +required immutable scalars
+# (resourceDefinitionName / nodeName) — same SSA-validation rule the
+# observer's writeSkipDiskProp follows; --force-conflicts mirrors the
+# observer's ForceOwnership.
 
 set -euo pipefail
 
@@ -44,7 +68,7 @@ SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=lib.sh
 source "$SCRIPT_DIR/lib.sh"
 
-require_workers 2
+require_workers 3
 
 linstor_cli_setup
 
@@ -59,56 +83,90 @@ trap cleanup EXIT
 
 N1=$WORKER_1
 N2=$WORKER_2
-
-echo ">> [Bug 278] 2-replica diskful RD on $N1+$N2"
-"${LCTL[@]}" resource-definition create "$RD" >/dev/null
-"${LCTL[@]}" volume-definition create "$RD" 128M >/dev/null
-"${LCTL[@]}" resource create "$N1" "$RD" --storage-pool=stand >/dev/null
-"${LCTL[@]}" resource create "$N2" "$RD" --storage-pool=stand >/dev/null
-
-echo ">> wait for both diskful UpToDate"
-RD="$RD" wait_uptodate "$RD" "$N1" "$N2"
-
-echo ">> stamp DrbdOptions/SkipDisk=True onto $N2 (simulates pre-upgrade defensive stamp)"
-# Server-side-apply under the OBSERVER's field manager
-# (`blockstor-satellite-skipdisk`, pkg/satellite/controllers/observer.go)
-# so the stamp carries the exact SSA ownership the real defensive write
-# has. The Bug 278 auto-clear RELEASES that owner's claim — that is the
-# whole product contract: the satellite un-stamps only its own
-# defensive writes, while an operator-set SkipDisk (different field
-# manager) survives the reattach. The previous revision of this cell
-# stamped via `kubectl patch --type=merge` (field manager
-# "kubectl-patch"), which the release path correctly refuses to clear —
-# the cell was asserting the opposite of the product contract and could
-# never pass (BUG-040 triage).
-#
-# The apply doc must carry the two +required immutable scalars
-# (resourceDefinitionName / nodeName) — same SSA-validation rule the
-# observer's writeSkipDiskProp follows.
-# --force-conflicts mirrors the observer's ForceOwnership: the apply
-# doc must claim the two required scalars even though the controller's
-# field manager owns them.
-kubectl apply --server-side --force-conflicts --field-manager=blockstor-satellite-skipdisk -f - <<EOF
+N3=$WORKER_3
+
+# stamp_skipdisk <node> — SSA-apply DrbdOptions/SkipDisk=True onto
+# (RD, NODE).Spec.Props under the observer's field manager, mirroring
+# the real defensive stamp's ownership shape.
+stamp_skipdisk() {
+    local node=$1
+    kubectl apply --server-side --force-conflicts \
+        --field-manager=blockstor-satellite-skipdisk -f - <<EOF >/dev/null
 apiVersion: blockstor.cozystack.io/v1alpha1
 kind: Resource
 metadata:
-  name: ${RD}.${N2}
+  name: ${RD}.${node}
 spec:
   resourceDefinitionName: ${RD}
-  nodeName: ${N2}
+  nodeName: ${node}
   props:
     DrbdOptions/SkipDisk: "True"
 EOF
+}
+
+# skipdisk_prop <node> — echo the current SkipDisk prop value on
+# (RD, NODE).Spec.Props ("True" or empty).
+skipdisk_prop() {
+    local node=$1
+    kubectl get "resources.blockstor.cozystack.io/${RD}.${node}" \
+        -o jsonpath='{.spec.props.DrbdOptions/SkipDisk}' 2>/dev/null || echo ""
+}
 
-echo ">> confirm SkipDisk is stamped on $N2 Spec.Props"
-stamped=$(kubectl get "resources.blockstor.cozystack.io/${RD}.${N2}" \
-    -o jsonpath='{.spec.props.DrbdOptions/SkipDisk}' 2>/dev/null || echo "")
-if [[ "$stamped" != "True" ]]; then
-    echo "FAIL (Bug 278 setup): SkipDisk stamp did not land (got '$stamped'); aborting" >&2
+echo ">> [Bug 278] 2-replica diskful RD on $N1+$N2, diskless replica on $N3"
+"${LCTL[@]}" resource-definition create "$RD" >/dev/null
+"${LCTL[@]}" volume-definition create "$RD" 128M >/dev/null
+"${LCTL[@]}" resource create "$N1" "$RD" --storage-pool=stand >/dev/null
+"${LCTL[@]}" resource create "$N2" "$RD" --storage-pool=stand >/dev/null
+"${LCTL[@]}" resource create "$N3" "$RD" --diskless >/dev/null
+
+echo ">> wait for both diskful UpToDate ($N1, $N2)"
+wait_uptodate "$RD" "$N1" "$N2"
+
+echo ">> wait for $N3 to settle Diskless"
+if ! wait_status_diskless "$RD" "$N3" 60; then
+    echo "FAIL (Bug 278 setup): $N3 never converged to Diskless; aborting" >&2
+    exit 1
+fi
+
+# ---- Part A: PERSISTENCE on a genuinely-Diskless replica -----------------
+#
+# HasDisklessVolume==true on $N3, so the auto-clear gate is closed: the
+# defensive stamp MUST survive there. This is the positive control that
+# proves the clearer is selective (it does not blindly wipe every stamp).
+echo ">> [Bug 278/A] stamp SkipDisk=True onto the DISKLESS $N3 (must persist)"
+stamp_skipdisk "$N3"
+
+echo ">> [Bug 278/A] confirm SkipDisk persists on Diskless $N3 (15s observation)"
+# Poll for a sustained 15s: the stamp must stay pinned, not flicker away.
+persist_ok=true
+for _ in $(seq 1 8); do
+    val=$(skipdisk_prop "$N3")
+    if [[ "$val" != "True" ]]; then
+        persist_ok=false
+        break
+    fi
+    sleep 2
+done
+if [[ "$persist_ok" != "true" ]]; then
+    echo "FAIL (Bug 278/A regression): SkipDisk was cleared on the Diskless $N3 — the clearer is over-eager (must only clear on a HEALTHY slot)" >&2
+    kubectl get "resources.blockstor.cozystack.io/${RD}.${N3}" \
+        -o json 2>/dev/null | jq '{props: .spec.props, status: .status}' >&2 || true
     exit 1
 fi
 
-echo ">> restart satellite pod on $N2 (simulates Talos kernel upgrade reattach)"
+# ---- Part B: AUTO-CLEAR on a HEALTHY replica after reattach --------------
+#
+# Stamp the same defensive prop onto the HEALTHY $N2. Restart its
+# satellite pod (the documented Talos kernel-upgrade reattach shape),
+# then assert the satellite auto-clears the observer-owned stamp and the
+# disk stays UpToDate. We poll for ABSENCE — the steady state the
+# product converges to — rather than the transient stamped state (which
+# the clearer correctly wipes on every reconcile of a healthy slot, and
+# which the previous revision wrongly tried to read back as a precondition).
+echo ">> [Bug 278/B] stamp SkipDisk=True onto the HEALTHY $N2 (simulates pre-upgrade defensive stamp)"
+stamp_skipdisk "$N2"
+
+echo ">> [Bug 278/B] restart satellite pod on $N2 (simulates Talos kernel upgrade reattach)"
 sat_pod=$(kubectl -n "$NS" get pods -l app=blockstor-satellite \
     --field-selector "spec.nodeName=$N2" \
     -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
@@ -119,6 +177,7 @@ fi
 kubectl -n "$NS" delete pod "$sat_pod" --wait=true >/dev/null
 
 echo ">> wait for satellite back up on $N2"
+new_pod=""
 for _ in $(seq 1 60); do
     new_pod=$(kubectl -n "$NS" get pods -l app=blockstor-satellite \
         --field-selector "spec.nodeName=$N2,status.phase=Running" \
@@ -133,33 +192,31 @@ if [[ -z "$new_pod" || "$new_pod" == "$sat_pod" ]]; then
     exit 1
 fi
 
-echo ">> wait up to 60s for the satellite to auto-clear SkipDisk on $N2"
+echo ">> [Bug 278/B] wait up to 60s for the satellite to auto-clear SkipDisk on $N2"
 # Poll Resource.Spec.Props.DrbdOptions/SkipDisk — Bug 278 contract:
 # the satellite reconciler probes kernel state, sees healthy
-# (disk:UpToDate, backing_dev set), and releases the observer's SSA
-# claim on the SkipDisk key. After SSA release the apiserver removes
-# the key from Spec.Props (no other owner claims it).
+# (HasDisklessVolume==false), and releases the observer's SSA claim on
+# the SkipDisk key. After SSA release the apiserver removes the key from
+# Spec.Props (no other owner claims it).
 cleared=false
 for _ in $(seq 1 30); do
-    val=$(kubectl get "resources.blockstor.cozystack.io/${RD}.${N2}" \
-        -o jsonpath='{.spec.props.DrbdOptions/SkipDisk}' 2>/dev/null || echo "")
-    if [[ -z "$val" ]]; then
+    if [[ -z "$(skipdisk_prop "$N2")" ]]; then
         cleared=true
         break
     fi
     sleep 2
 done
 
 if [[ "$cleared" != "true" ]]; then
-    echo "FAIL (Bug 278): SkipDisk did NOT auto-clear on $N2 within 60s after satellite restart" >&2
+    echo "FAIL (Bug 278): SkipDisk did NOT auto-clear on the HEALTHY $N2 within 60s after satellite restart" >&2
     kubectl get "resources.blockstor.cozystack.io/${RD}.${N2}" \
         -o json 2>/dev/null | jq '{props: .spec.props, status: .status}' >&2 || true
     exit 1
 fi
 
-echo ">> confirm $N2 disk state is back to UpToDate (re-attached after clear)"
+echo ">> [Bug 278/B] confirm $N2 disk state is UpToDate (stayed attached after clear)"
 if ! wait_status_state "$RD" "$N2" "UpToDate" 60 0; then
-    echo "FAIL (Bug 278 deep): SkipDisk cleared but $N2 did not re-attach to UpToDate within 60s" >&2
+    echo "FAIL (Bug 278 deep): SkipDisk cleared but $N2 did not hold UpToDate within 60s" >&2
     kubectl get "resources.blockstor.cozystack.io/${RD}.${N2}" \
         -o json 2>/dev/null | jq '{props: .spec.props, status: .status}' >&2 || true
     exit 1
@@ -175,4 +232,4 @@ if [[ "$n1_disk" != "UpToDate" ]]; then
     exit 1
 fi
 
-echo ">> bug-278-skipdisk-autoclear-after-reattach OK (auto-clear fires on healthy reattach, $N2 back to UpToDate)"
+echo ">> bug-278-skipdisk-autoclear-after-reattach OK (A: stamp persists on Diskless $N3; B: auto-clear fires on healthy $N2 reattach, $N2 stays UpToDate)"