diff --git a/internal/controller/ensure_tiebreaker_autoplace_target_bug040_test.go b/internal/controller/ensure_tiebreaker_autoplace_target_bug040_test.go new file mode 100644 index 00000000..a175ccfc --- /dev/null +++ b/internal/controller/ensure_tiebreaker_autoplace_target_bug040_test.go @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: Apache-2.0 + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller_test + +import ( + "context" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + blockstoriov1alpha1 "github.com/cozystack/blockstor/api/v1alpha1" + controllerpkg "github.com/cozystack/blockstor/internal/controller" + apiv1 "github.com/cozystack/blockstor/pkg/api/v1" + "github.com/cozystack/blockstor/pkg/store" +) + +// BUG-040 (release-gate, stand dev sweep @73895806c) — the +// auto-tiebreaker witness landed on the one node the operator had +// explicitly drained with `linstor node set-property +// AutoplaceTarget false` (corner F4 maintenance-drain workflow). +// +// Stand repro (cli-matrix/n-autoplace-target-excludes): +// +// $ linstor n sp dev-worker-1 AutoplaceTarget false +// $ linstor rd c ccf-aptgt-1 && linstor vd c ccf-aptgt-1 256M +// $ linstor r c --auto-place=2 -s lvm-thin ccf-aptgt-1 +// → diskful on dev-worker-2 + dev-worker-3 (placer honours the prop), +// but ensureTiebreaker then stamped a [DISKLESS TIE_BREAKER] row on +// dev-worker-1 — controller log 2026-06-13T00:45:16Z, reconcileID +// 6874c482: "ensureTiebreaker … willCreate=true". +// +// pickTiebreakerNodeForRD only consulted the EVICTED/LOST flag set; +// the AutoplaceTarget prop — which the placer has honoured since +// corner F4 (#102) — was never read, so the witness (an automatic +// placement) violated the operator's drain. Upstream LINSTOR routes +// tiebreaker selection through the same autoplacer that honours +// AutoplaceTarget, so upstream never pins a witness to a drained node: +// with no other spare the witness is simply not created and quorum +// falls back to off (isQuorumFeasible: 2 diskful + 0 diskless → off). + +// TestBug040WitnessNeverLandsOnAutoplaceExcludedNode pins the create +// branch: 2 diskful + the only spare node carrying +// AutoplaceTarget=false. The witness MUST NOT be created anywhere and +// the quorum prop MUST resolve to off — not the phantom-witness +// majority the pre-fix applyWitnessDecision computed. +func TestBug040WitnessNeverLandsOnAutoplaceExcludedNode(t *testing.T) { + t.Parallel() + + scheme := newScheme(t) + st := store.NewInMemory() + ctx := context.Background() + + for _, n := range []string{"n1", "n2"} { + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: n, Type: apiv1.NodeTypeSatellite, + }); err != nil { + t.Fatalf("seed node %s: %v", n, err) + } + } + + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n3", Type: apiv1.NodeTypeSatellite, + Props: map[string]string{apiv1.PropAutoplaceTarget: "false"}, + }); err != nil { + t.Fatalf("seed n3 (AutoplaceTarget=false): %v", err) + } + + for _, n := range []string{"n1", "n2"} { + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug040", NodeName: n, + }); err != nil { + t.Fatalf("seed diskful %s: %v", n, err) + } + } + + rd := &blockstoriov1alpha1.ResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "pvc-bug040"}, + } + + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rd).Build() + + rec := &controllerpkg.ResourceDefinitionReconciler{ + Client: cli, + Scheme: scheme, + Store: st, + } + + if err := rec.EnsureTiebreaker(ctx, rd); err != nil { + t.Fatalf("EnsureTiebreaker: %v", err) + } + + all, err := st.Resources().ListByDefinition(ctx, "pvc-bug040") + if err != nil { + t.Fatalf("list: %v", err) + } + + // Invariant 1: nothing landed on the drained node. + if _, err := st.Resources().Get(ctx, "pvc-bug040", "n3"); err == nil { + t.Errorf("BUG-040: a replica landed on the AutoplaceTarget=false node n3; entries=%v", all) + } + + // Invariant 2: exactly the two diskful replicas — no witness was + // created anywhere (the drained node was the only spare). + diskful, diskless, witness := classifyReplicas(all) + if diskful != 2 || diskless != 0 || witness != 0 { + t.Errorf("BUG-040: composition diskful=%d diskless=%d witness=%d, want 2/0/0; entries=%v", + diskful, diskless, witness, all) + } + + // Invariant 3: with only 2 voters the quorum prop must be off — + // the pre-fix code appended a phantom witness to the quorum + // computation and stamped majority on a 2-voter RD. EnsureTiebreaker + // mutates the in-memory rd it was handed (setQuorum's optimistic + // loop), so assert on that. + if q := rd.Spec.Props["DrbdOptions/Resource/quorum"]; q != "off" { + t.Errorf("BUG-040: quorum prop got %q, want off (2 diskful, no witness possible)", q) + } +} + +// TestBug040WitnessLandsWhenAutoplaceTargetNotFalse pins the fail-open +// parse: true / garbage / unset values keep the spare node eligible, +// so the witness lands there exactly as before the fix. +func TestBug040WitnessLandsWhenAutoplaceTargetNotFalse(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + name string + props map[string]string + }{ + {name: "explicit-true", props: map[string]string{apiv1.PropAutoplaceTarget: "true"}}, + {name: "garbage-value", props: map[string]string{apiv1.PropAutoplaceTarget: "nope"}}, + {name: "unset", props: nil}, + } { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + scheme := newScheme(t) + st := store.NewInMemory() + ctx := context.Background() + + for _, n := range []string{"n1", "n2"} { + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: n, Type: apiv1.NodeTypeSatellite, + }); err != nil { + t.Fatalf("seed node %s: %v", n, err) + } + } + + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n3", Type: apiv1.NodeTypeSatellite, Props: tc.props, + }); err != nil { + t.Fatalf("seed n3: %v", err) + } + + for _, n := range []string{"n1", "n2"} { + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug040b", NodeName: n, + }); err != nil { + t.Fatalf("seed diskful %s: %v", n, err) + } + } + + rd := &blockstoriov1alpha1.ResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "pvc-bug040b"}, + } + + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rd).Build() + + rec := &controllerpkg.ResourceDefinitionReconciler{ + Client: cli, + Scheme: scheme, + Store: st, + } + + if err := rec.EnsureTiebreaker(ctx, rd); err != nil { + t.Fatalf("EnsureTiebreaker: %v", err) + } + + witnessRow, err := st.Resources().Get(ctx, "pvc-bug040b", "n3") + if err != nil { + t.Fatalf("witness must land on the eligible spare n3 (props=%v): %v", tc.props, err) + } + + diskful, _, witness := classifyReplicas([]apiv1.Resource{witnessRow}) + if diskful != 0 || witness != 1 { + t.Errorf("n3 row must be a TIE_BREAKER witness; flags=%v", witnessRow.Flags) + } + }) + } +} + +// TestBug040ExistingWitnessSurvivesAutoplaceTargetFlip pins the +// existing-replica half of the AutoplaceTarget contract: the prop +// excludes the node from NEW placements only — a witness that already +// lives on the node stays put when the operator later stamps +// AutoplaceTarget=false (same "no migration" semantic the placer +// documents for diskful replicas, and what upstream's drain workflow +// promises). +func TestBug040ExistingWitnessSurvivesAutoplaceTargetFlip(t *testing.T) { + t.Parallel() + + scheme := newScheme(t) + st := store.NewInMemory() + ctx := context.Background() + + for _, n := range []string{"n1", "n2"} { + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: n, Type: apiv1.NodeTypeSatellite, + }); err != nil { + t.Fatalf("seed node %s: %v", n, err) + } + } + + // The witness host: drained AFTER the witness landed. + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n3", Type: apiv1.NodeTypeSatellite, + Props: map[string]string{apiv1.PropAutoplaceTarget: "false"}, + }); err != nil { + t.Fatalf("seed n3: %v", err) + } + + for _, n := range []string{"n1", "n2"} { + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug040c", NodeName: n, + }); err != nil { + t.Fatalf("seed diskful %s: %v", n, err) + } + } + + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug040c", NodeName: "n3", + Flags: []string{apiv1.ResourceFlagDiskless, apiv1.ResourceFlagTieBreaker}, + }); err != nil { + t.Fatalf("seed existing witness on n3: %v", err) + } + + rd := &blockstoriov1alpha1.ResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "pvc-bug040c"}, + } + + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rd).Build() + + rec := &controllerpkg.ResourceDefinitionReconciler{ + Client: cli, + Scheme: scheme, + Store: st, + } + + if err := rec.EnsureTiebreaker(ctx, rd); err != nil { + t.Fatalf("EnsureTiebreaker: %v", err) + } + + witnessRow, err := st.Resources().Get(ctx, "pvc-bug040c", "n3") + if err != nil { + t.Fatalf("BUG-040: existing witness on n3 must survive the AutoplaceTarget flip: %v", err) + } + + _, _, witness := classifyReplicas([]apiv1.Resource{witnessRow}) + if witness != 1 { + t.Errorf("n3 row must still be a TIE_BREAKER witness; flags=%v", witnessRow.Flags) + } + + // Quorum stays majority: 2 diskful + the surviving witness. + if q := rd.Spec.Props["DrbdOptions/Resource/quorum"]; q != "majority" { + t.Errorf("quorum prop got %q, want majority (witness survived)", q) + } +} diff --git a/internal/controller/resourcedefinition_controller.go b/internal/controller/resourcedefinition_controller.go index 24ee89d8..fa41cf44 100644 --- a/internal/controller/resourcedefinition_controller.go +++ b/internal/controller/resourcedefinition_controller.go @@ -697,11 +697,22 @@ func (r *ResourceDefinitionReconciler) applyWitnessDecision( ) ([]apiv1.Resource, error) { switch { case wantWitness && len(witness) == 0: - err := r.createWitness(ctx, rd, replicas) + created, err := r.createWitness(ctx, rd, replicas) if err != nil { return nil, err } + if !created { + // No eligible spare node (every candidate is busy, drained, + // or AutoplaceTarget=false — BUG-040). The witness was NOT + // created, so the quorum computation below must not count a + // phantom third voter: 2 diskful + 0 diskless is quorum=off + // per upstream isQuorumFeasible. Pre-fix this branch appended + // the phantom unconditionally and stamped quorum=majority on + // a 2-voter RD, arming a both-halves-freeze on partition. + return diskless, nil + } + return append(diskless, apiv1.Resource{ Flags: []string{apiv1.ResourceFlagDiskless, apiv1.ResourceFlagTieBreaker}, }), nil @@ -768,7 +779,13 @@ func quorumPolicy(diskful, diskless int) string { // The probe only runs when APIReader is non-nil — unit-test setups // that construct the reconciler directly and rely on the // Bug 104/108 fake-client-only fixtures stay unaffected. -func (r *ResourceDefinitionReconciler) createWitness(ctx context.Context, rd *blockstoriov1alpha1.ResourceDefinition, existing []apiv1.Resource) error { +// +// The bool return reports whether a witness was actually created (or +// already existed under a concurrent reconcile). The no-candidate / +// deferred-probe paths return false so the caller's quorum +// computation reflects the real post-write replica set instead of a +// phantom witness (BUG-040). +func (r *ResourceDefinitionReconciler) createWitness(ctx context.Context, rd *blockstoriov1alpha1.ResourceDefinition, existing []apiv1.Resource) (bool, error) { hostingReplica := map[string]bool{} for i := range existing { hostingReplica[existing[i].NodeName] = true @@ -781,13 +798,13 @@ func (r *ResourceDefinitionReconciler) createWitness(ctx context.Context, rd *bl // in-depth against caller-snapshot staleness. tiebreakerNode, err := r.pickTiebreakerNodeForRD(ctx, rd.Name, hostingReplica) if err != nil { - return err + return false, err } if tiebreakerNode == "" { // No spare healthy node; the witness can't be created // today. Quorum will fall back to off below. - return nil + return false, nil } // Bug-024 placement guard: the candidate list above comes from @@ -803,14 +820,14 @@ func (r *ResourceDefinitionReconciler) createWitness(ctx context.Context, rd *bl // rdReconcileRequeue retries with a fresh candidate list. probe, err := r.probeNodeDirect(ctx, tiebreakerNode) if err != nil { - return err + return false, err } if !probe.found || probe.drained { logf.FromContext(ctx).Info("witness candidate vanished or drained; deferring witness create", "rd", rd.Name, "node", tiebreakerNode, "found", probe.found) - return nil + return false, nil } newWitness := apiv1.Resource{ @@ -821,7 +838,7 @@ func (r *ResourceDefinitionReconciler) createWitness(ctx context.Context, rd *bl err = r.Store.Resources().Create(ctx, &newWitness) if err != nil && !stderrors.Is(err, store.ErrAlreadyExists) && !alreadyExists(err) { - return err + return false, err } r.rollbackWitnessIfRDGone(ctx, rd.Name, tiebreakerNode) @@ -834,7 +851,7 @@ func (r *ResourceDefinitionReconciler) createWitness(ctx context.Context, rd *bl // repair leg (dropStrandedReplicas) on the next pass. r.rollbackWitnessIfNodeGone(ctx, rd.Name, tiebreakerNode) - return nil + return true, nil } // rollbackWitnessIfNodeGone deletes the just-created witness when its @@ -1239,6 +1256,13 @@ func (r *ResourceDefinitionReconciler) pickTiebreakerNodeForRD( continue } + // BUG-040: AutoplaceTarget=false is the maintenance-drain + // opt-out from autoplace targeting, and the auto-witness is + // an automatic placement — never pin it to a drained node. + if isAutoplaceExcludedNode(&nodes[i]) { + continue + } + if nodes[i].Type != "" && nodes[i].Type != apiv1.NodeTypeSatellite && nodes[i].Type != apiv1.NodeTypeCombined { continue } @@ -1267,6 +1291,30 @@ func isDisabledNode(node *apiv1.Node) bool { return false } +// isAutoplaceExcludedNode mirrors placer.autoplaceExcludedNodes for the +// RD-level tiebreaker path (BUG-040): a node whose `AutoplaceTarget` +// prop parses to an explicit false has opted out of autoplace TARGETING +// — and the auto-tiebreaker witness is an automatic placement, so the +// picker must never select it. Upstream LINSTOR routes tiebreaker +// selection through the same autoplacer that honours AutoplaceTarget, +// so a drained maintenance node never receives the witness either; on +// a topology with no other spare the witness is simply not created and +// quorum falls back to off. +// +// Only a parseable false excludes the node — true / unset / typo keep +// it eligible, the same fail-open parse the placer uses, so a +// fat-fingered prop can never silently exclude the node. +func isAutoplaceExcludedNode(node *apiv1.Node) bool { + raw, ok := node.Props[apiv1.PropAutoplaceTarget] + if !ok { + return false + } + + v, err := strconv.ParseBool(raw) + + return err == nil && !v +} + // dropStrandedReplicas reduces the RD's replica snapshot to the live // voting set and reaps stranded witnesses along the way. Two classes // of replica are excluded: @@ -1429,7 +1477,10 @@ func (r *ResourceDefinitionReconciler) probeNodeDirect(ctx context.Context, wire return nodeProbe{}, err } - return nodeProbe{found: true, drained: isDisabledNode(&node)}, nil + return nodeProbe{ + found: true, + drained: isDisabledNode(&node) || isAutoplaceExcludedNode(&node), + }, nil } var list blockstoriov1alpha1.NodeList @@ -1442,7 +1493,10 @@ func (r *ResourceDefinitionReconciler) probeNodeDirect(ctx context.Context, wire continue } - return nodeProbe{found: true, drained: nodeHasDrainFlag(&list.Items[i])}, nil + return nodeProbe{ + found: true, + drained: nodeHasDrainFlag(&list.Items[i]) || nodeCRDAutoplaceExcluded(&list.Items[i]), + }, nil } return nodeProbe{}, nil @@ -1751,6 +1805,24 @@ func nodeHasDrainFlag(n *blockstoriov1alpha1.Node) bool { return false } +// nodeCRDAutoplaceExcluded is the Node-CRD-shaped twin of +// isAutoplaceExcludedNode for the probeNodeDirect authoritative-reader +// leg (BUG-040): the per-node `AutoplaceTarget` prop lives verbatim in +// Spec.Props on the CRD, and a freshly-stamped false must veto the +// witness create the same way a freshly-stamped EVICTED flag does — +// the informer-backed candidate list can lag the REST set-property by +// tens of milliseconds. +func nodeCRDAutoplaceExcluded(n *blockstoriov1alpha1.Node) bool { + raw, ok := n.Spec.Props[apiv1.PropAutoplaceTarget] + if !ok { + return false + } + + v, err := strconv.ParseBool(raw) + + return err == nil && !v +} + // enqueueRDsForNode maps a Node flag-change event to every // ResourceDefinition in the cluster. The tiebreaker invariant is a // cluster-wide candidate-set decision (any RD's witness might have diff --git a/internal/controller/rg_rebalance_bug040_constraints_test.go b/internal/controller/rg_rebalance_bug040_constraints_test.go new file mode 100644 index 00000000..ce84301d --- /dev/null +++ b/internal/controller/rg_rebalance_bug040_constraints_test.go @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: Apache-2.0 + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller_test + +import ( + "context" + "testing" + + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + controllerpkg "github.com/cozystack/blockstor/internal/controller" + apiv1 "github.com/cozystack/blockstor/pkg/api/v1" + "github.com/cozystack/blockstor/pkg/store" +) + +// BUG-040 placement-constraint pins for the RGRebalanceReconciler's +// refill path. The refill rides the shared placer, which already +// enforces the constraints — these tests pin the WIRING so a future +// refactor that hands the placer a different filter (or bypasses it) +// trips a unit test instead of a stand sweep: +// +// 1. refill never lands a replica on an AutoplaceTarget=false node; +// 2. refill never exceeds the RG place_count (strictly additive, +// gap-fill only); +// 3. refill never lands a replica on an EVICTED node (and the evicted +// node's stranded replica does not mask the deficit — the gap is +// filled on a healthy spare). +// +// The kill-switch (`BalanceResourcesEnabled=false`) is pinned by +// TestRGRebalanceReconcilerHonoursBalanceResourcesDisabled and +// TestRebalanceHonoursDisabled alongside this file. + +// seedBug040RebalanceFixture is seedRebalanceFixture with per-node +// control over flags and props. 4 nodes (n1..n4) so the EVICTED case +// has both a forbidden node and a healthy spare. +func seedBug040RebalanceFixture( + t *testing.T, + ctx context.Context, + st store.Store, + placeCount int32, + nodeFlags map[string][]string, + nodeProps map[string]map[string]string, + existingReplicaNodes []string, +) { + t.Helper() + + for _, n := range []string{"n1", "n2", "n3", "n4"} { + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: n, + Type: apiv1.NodeTypeSatellite, + Flags: nodeFlags[n], + Props: nodeProps[n], + }); err != nil { + t.Fatalf("seed node %q: %v", n, err) + } + + if err := st.StoragePools().Create(ctx, &apiv1.StoragePool{ + StoragePoolName: "pool", + NodeName: n, + ProviderKind: apiv1.StoragePoolKindLVMThin, + }); err != nil { + t.Fatalf("seed pool %q: %v", n, err) + } + } + + if err := st.ResourceGroups().Create(ctx, &apiv1.ResourceGroup{ + Name: "rg", + SelectFilter: apiv1.AutoSelectFilter{ + PlaceCount: apiv1.LaxInt32(placeCount), + StoragePool: "pool", + }, + Annotations: map[string]string{ + apiv1.AnnotationRGRebalancePending: "2026-06-13T00:00:00Z", + }, + }); err != nil { + t.Fatalf("seed rg: %v", err) + } + + if err := st.ResourceDefinitions().Create(ctx, &apiv1.ResourceDefinition{ + Name: "pvc-bug040-rebalance", + ResourceGroupName: "rg", + }); err != nil { + t.Fatalf("seed rd: %v", err) + } + + for _, n := range existingReplicaNodes { + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug040-rebalance", NodeName: n, + }); err != nil { + t.Fatalf("seed existing replica on %q: %v", n, err) + } + } +} + +// runBug040Rebalance drives one annotation-armed reconcile and returns +// the post-pass replica set keyed by node. +func runBug040Rebalance(t *testing.T, ctx context.Context, st store.Store) map[string]apiv1.Resource { + t.Helper() + + scheme := newScheme(t) + cli := fake.NewClientBuilder().WithScheme(scheme).Build() + rec := &controllerpkg.RGRebalanceReconciler{Client: cli, Scheme: scheme, Store: st} + + if _, err := rec.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Name: "rg"}}); err != nil { + t.Fatalf("Reconcile: %v", err) + } + + got, err := st.Resources().ListByDefinition(ctx, "pvc-bug040-rebalance") + if err != nil { + t.Fatalf("list: %v", err) + } + + byNode := make(map[string]apiv1.Resource, len(got)) + for i := range got { + byNode[got[i].NodeName] = got[i] + } + + return byNode +} + +// TestRGRebalanceRefillSkipsAutoplaceExcludedNode: place_count=3 with +// replicas on n1+n2 and BOTH spares drained via AutoplaceTarget=false. +// The refill must leave the deficit unfilled rather than violate the +// operator's drain. +func TestRGRebalanceRefillSkipsAutoplaceExcludedNode(t *testing.T) { + t.Parallel() + + ctx := context.Background() + st := store.NewInMemory() + + drained := map[string]string{apiv1.PropAutoplaceTarget: "false"} + seedBug040RebalanceFixture(t, ctx, st, 3, + nil, + map[string]map[string]string{"n3": drained, "n4": drained}, + []string{"n1", "n2"}) + + byNode := runBug040Rebalance(t, ctx, st) + + for _, forbidden := range []string{"n3", "n4"} { + if _, hit := byNode[forbidden]; hit { + t.Errorf("BUG-040: rebalance refill landed a replica on AutoplaceTarget=false node %s; got %v", + forbidden, byNode) + } + } + + if len(byNode) != 2 { + t.Errorf("replica count after refill: got %d, want 2 (deficit must stay unfilled); got %v", + len(byNode), byNode) + } +} + +// TestRGRebalanceRefillNeverExceedsPlaceCount: an RD already AT the RG +// place_count must come through an annotation-armed pass untouched — +// the refill is gap-fill only, never an overshoot. +func TestRGRebalanceRefillNeverExceedsPlaceCount(t *testing.T) { + t.Parallel() + + ctx := context.Background() + st := store.NewInMemory() + + seedBug040RebalanceFixture(t, ctx, st, 2, nil, nil, []string{"n1", "n2"}) + + byNode := runBug040Rebalance(t, ctx, st) + + if len(byNode) != 2 { + t.Fatalf("BUG-040: refill overshot place_count=2: got %d replicas (%v)", len(byNode), byNode) + } + + for _, n := range []string{"n1", "n2"} { + if _, ok := byNode[n]; !ok { + t.Errorf("pre-existing replica on %s vanished during the no-op pass; got %v", n, byNode) + } + } +} + +// TestRGRebalanceRefillSkipsEvictedNode: place_count=3, replicas on +// n1+n2+n3 with n3 EVICTED. The stranded replica must not mask the +// deficit (placer accounting drops it) and the refill must land on the +// healthy spare n4 — never back on the EVICTED node. +func TestRGRebalanceRefillSkipsEvictedNode(t *testing.T) { + t.Parallel() + + ctx := context.Background() + st := store.NewInMemory() + + seedBug040RebalanceFixture(t, ctx, st, 3, + map[string][]string{"n3": {apiv1.NodeFlagEvicted}}, + nil, + []string{"n1", "n2", "n3"}) + + byNode := runBug040Rebalance(t, ctx, st) + + if _, ok := byNode["n4"]; !ok { + t.Errorf("BUG-040: deficit behind the EVICTED n3 was not refilled on the healthy spare n4; got %v", byNode) + } + + // The stranded replica on the EVICTED node is left for the + // eviction machinery to prune — the rebalance pass itself is + // additive and must not have created a SECOND row there. + if len(byNode) != 4 { + t.Errorf("replica set after refill: got %d rows (%v), want 4 (n1+n2+stranded n3+refilled n4)", + len(byNode), byNode) + } +} diff --git a/tests/e2e/cli-matrix/auto-diskful-evicted-node.sh b/tests/e2e/cli-matrix/auto-diskful-evicted-node.sh index 53eb695f..d68d8a50 100755 --- a/tests/e2e/cli-matrix/auto-diskful-evicted-node.sh +++ b/tests/e2e/cli-matrix/auto-diskful-evicted-node.sh @@ -2,37 +2,45 @@ # # usage: auto-diskful-evicted-node.sh WORK_DIR # -# L6 cli-matrix cell — Bug 390. +# L6 cli-matrix cell — Bug 390 (auto-diskful must ignore disabled-node +# replicas) / BUG-040 cell repair. # # The auto-diskful controller partitions an RD's replicas to decide -# whether place_count is satisfied. Pre-fix the partition was blind to -# node state: a diskful replica on an EVICTED/LOST node was still -# counted as a live diskful, so the deficit created by draining a node -# was masked — the timer never armed and the lost diskful was never -# replaced. Worse, promoteOne could re-stamp StorPoolName onto a -# replica whose node was being drained, re-creating diskful storage on -# a node the operator is trying to evict. +# whether the parent RG's place_count is satisfied. Pre-Bug-390 the +# partition was blind to node state: a diskful replica on an +# EVICTED/LOST node was still counted as a live diskful, so the deficit +# created by draining a node was masked — the timer never armed and the +# lost diskful was never replaced. Worse, promoteOne could promote a +# diskless replica sitting on a node the operator was draining. # -# Reproduction (operator-CLI level): +# BUG-040 triage note: the original revision of this cell asserted +# "refill to >= 3 diskful on healthy nodes" after evacuating one of 3 +# workers — topologically impossible (only 2 healthy workers remain) — +# and created the RD in DfltRscGrp, whose empty SelectFilter +# (place_count 0) makes the auto-diskful controller skip the RD +# entirely (`placeCountForRD` finds no target). The cell could never +# pass; the sweep exposed it. This revision pins the same product +# contract on an achievable 3-worker shape: # -# $ linstor rd c ; linstor vd c 128M -# $ linstor r c -s stand -# $ linstor r c -s stand -# $ linstor r c -s stand # 3 diskful, UpToDate -# $ linstor controller set-property DrbdOptions/auto-diskful 1 -# $ linstor node evacuate # n3 → EVICTED +# 1. rg c --place-count 2 + rd c --resource-group + vd c. +# 2. r c --diskless # user-diskless promotion candidate +# (created FIRST so no auto-tiebreaker witness ever spawns: +# a non-witness diskless already breaks the tie). +# 3. r c / r c diskful # place_count satisfied. +# 4. controller set-property DrbdOptions/auto-diskful 1 (minutes). +# 5. node evacuate # -> EVICTED, deficit of 1. # -# Expected (post-fix): the evicted node's diskful no longer counts -# toward place_count, so the controller refills a diskful replica on a -# HEALTHY node and the cluster reconverges to 3 diskful — and crucially -# NONE of the resulting diskful replicas sits on the evicted node. +# Expected (Bug 390 contract, outcome-level): the EVICTED node's +# diskful no longer counts toward place_count, so the cluster repairs +# the deficit by promoting the healthy spare's diskless replica to +# diskful (the auto-diskful timer and the evacuate add-before-drop +# migration both converge on the same outcome — the spare is the only +# eligible candidate). End state: 2 healthy diskful (d1 + spare), and +# crucially NO diskful replica on the evacuated node. # -# Pre-fix: the cluster stayed "satisfied" at 2 live + 1 evicted diskful -# (the evicted one masked the deficit), or a replacement was stamped +# Pre-fix: the cluster stayed "satisfied" at 1 live + 1 evicted diskful +# (the evicted one masked the deficit), or the replacement was stamped # back onto the evicted node. -# -# Contract: after evacuate + convergence, assert there are >= 3 diskful -# replicas AND the evicted node hosts no diskful replica. set -euo pipefail @@ -47,73 +55,83 @@ require_workers 3 linstor_cli_setup +RG=apg-390 RD=cli-matrix-390 POOL=${POOL:-stand} -N1=$WORKER_1 -N2=$WORKER_2 -N3=$WORKER_3 +D1=$WORKER_1 +D2=$WORKER_2 +SPARE=$WORKER_3 cleanup() { - # Best-effort restore of the evicted node so the stand is reusable - # by later cells. `node evacuate --restore` (or re-create) is - # idempotent; ignore failures. - "${LCTL[@]}" node evacuate --restore "$N3" >/dev/null 2>&1 || true + # Best-effort restore of the evacuated node so the stand is + # reusable by later cells; disarm the controller-scope timer. + "${LCTL[@]}" node restore "$D2" >/dev/null 2>&1 || true "${LCTL[@]}" controller set-property DrbdOptions/auto-diskful "" >/dev/null 2>&1 || true delete_rd "$RD" assert_no_orphans "$RD" + "${LCTL[@]}" resource-group delete "$RG" >/dev/null 2>&1 || true linstor_cli_teardown } trap cleanup EXIT -echo ">> [Bug 390] 3-replica diskful RD on $N1+$N2+$N3" -"${LCTL[@]}" resource-definition create "$RD" >/dev/null +echo ">> [Bug 390] rg c $RG --place-count 2 + rd c $RD" +"${LCTL[@]}" resource-group create "$RG" --place-count 2 --storage-pool="$POOL" >/dev/null +"${LCTL[@]}" resource-definition create "$RD" --resource-group "$RG" >/dev/null "${LCTL[@]}" volume-definition create "$RD" 128M >/dev/null -"${LCTL[@]}" resource create "$N1" "$RD" --storage-pool="$POOL" >/dev/null -"${LCTL[@]}" resource create "$N2" "$RD" --storage-pool="$POOL" >/dev/null -"${LCTL[@]}" resource create "$N3" "$RD" --storage-pool="$POOL" >/dev/null -echo ">> wait for all three diskful UpToDate" -# wait_uptodate asserts a pair at a time; chain the two pairs that -# cover all three replicas (n1↔n2 and n2↔n3). -wait_uptodate "$RD" "$N1" "$N2" -wait_uptodate "$RD" "$N2" "$N3" +# The promotion candidate FIRST: a user diskless on the spare. Created +# before any diskful so the auto-tiebreaker never wants a witness +# (non-witness diskless count >= 1 breaks the tie by itself) and the +# candidate carries no TIE_BREAKER flag — promoteOne refuses witnesses. +echo ">> [Bug 390] r c $SPARE $RD --diskless (promotion candidate)" +"${LCTL[@]}" resource create "$SPARE" "$RD" --diskless >/dev/null + +echo ">> [Bug 390] diskful pair on $D1 + $D2" +"${LCTL[@]}" resource create "$D1" "$RD" --storage-pool="$POOL" >/dev/null +"${LCTL[@]}" resource create "$D2" "$RD" --storage-pool="$POOL" >/dev/null + +echo ">> wait for both diskful UpToDate" +wait_uptodate "$RD" "$D1" "$D2" echo ">> arm auto-diskful (1 minute) at controller scope" "${LCTL[@]}" controller set-property DrbdOptions/auto-diskful 1 >/dev/null -echo ">> linstor node evacuate $N3 (Bug 390 trigger — drains/evicts the node)" +echo ">> linstor node evacuate $D2 (Bug 390 trigger — drains/evicts the node)" err_file=$(mktemp) -if ! "${LCTL[@]}" node evacuate "$N3" 2>"$err_file"; then +if ! "${LCTL[@]}" node evacuate "$D2" 2>"$err_file"; then rc=$? - echo "FAIL (Bug 390): node evacuate $N3 exited $rc" >&2 + echo "FAIL (Bug 390): node evacuate $D2 exited $rc" >&2 cat "$err_file" >&2 rm -f "$err_file" exit 1 fi rm -f "$err_file" -# Give the auto-diskful timer (1m) + the placer refill time to fire. -# 180s budget covers the 1-minute deadline plus replacement create + -# initial sync to UpToDate. -echo ">> wait up to 180s for the cluster to refill to 3 diskful on healthy nodes" -deadline=$(( $(date +%s) + 180 )) +# Budget: the 1-minute auto-diskful deadline + promotion + sync. The +# evacuate add-before-drop migration may repair the deficit earlier; +# either path must converge on the same end state. +echo ">> wait up to 240s for the deficit repair: $SPARE promoted diskful, no diskful on $D2" +deadline=$(( $(date +%s) + 240 )) ok=0 while (( $(date +%s) < deadline )); do - # Diskful nodes excluding the evicted one. mapfile -t diskful < <(linstor_diskful_nodes "$RD") - healthy_diskful=0 + spare_diskful=0 evicted_diskful=0 + healthy_diskful=0 for n in "${diskful[@]}"; do [[ -z "$n" ]] && continue - if [[ "$n" == "$N3" ]]; then + if [[ "$n" == "$D2" ]]; then evicted_diskful=1 else healthy_diskful=$(( healthy_diskful + 1 )) fi + if [[ "$n" == "$SPARE" ]]; then + spare_diskful=1 + fi done - if (( healthy_diskful >= 3 && evicted_diskful == 0 )); then + if (( spare_diskful == 1 && healthy_diskful >= 2 && evicted_diskful == 0 )); then ok=1 break fi @@ -121,19 +139,21 @@ while (( $(date +%s) < deadline )); do done if (( ok != 1 )); then - echo "FAIL (Bug 390 regression): cluster did not refill to 3 diskful on healthy nodes" >&2 - echo " evicted node: $N3" >&2 - echo " diskful nodes: $(linstor_diskful_nodes "$RD" | tr '\n' ' ')" >&2 + echo "FAIL (Bug 390 regression): deficit behind the EVICTED node was not repaired" >&2 + echo " evacuated node: $D2" >&2 + echo " diskful nodes: $(linstor_diskful_nodes "$RD" | tr '\n' ' ')" >&2 kubectl get resources.blockstor.cozystack.io --no-headers 2>/dev/null \ | awk -v rd="${RD}." '$1 ~ "^"rd' >&2 || true exit 1 fi -# Defensive double-check: the evicted node must host NO diskful replica -# (promoteOne must never select / re-stamp a disabled node — Bug 390 #4). -if linstor_diskful_nodes "$RD" | grep -qx "$N3"; then - echo "FAIL (Bug 390 #4): a diskful replica was placed/kept on the EVICTED node $N3" >&2 +# Defensive double-check (Bug 390 #4): the surviving diskful pair is +# d1 + spare, and the original healthy diskful was never demoted. +flags=$(kubectl get "resources.blockstor.cozystack.io/${RD}.${D1}" \ + -o jsonpath='{.spec.flags}' 2>/dev/null || echo "__absent__") +if [[ "$flags" == "__absent__" || "$flags" == *"DISKLESS"* ]]; then + echo "FAIL (Bug 390): healthy diskful on $D1 was lost/demoted during the repair; flags=$flags" >&2 exit 1 fi -echo ">> auto-diskful-evicted-node OK (Bug 390 pinned: evicted diskful does not count; refill lands on a healthy node)" +echo ">> auto-diskful-evicted-node OK (Bug 390 pinned: evicted diskful does not count; repair promotes the healthy spare)" diff --git a/tests/e2e/cli-matrix/bug-278-skipdisk-autoclear-after-reattach.sh b/tests/e2e/cli-matrix/bug-278-skipdisk-autoclear-after-reattach.sh index 67cd67c6..7d64d589 100755 --- a/tests/e2e/cli-matrix/bug-278-skipdisk-autoclear-after-reattach.sh +++ b/tests/e2e/cli-matrix/bug-278-skipdisk-autoclear-after-reattach.sh @@ -70,12 +70,35 @@ echo ">> wait for both diskful UpToDate" RD="$RD" wait_uptodate "$RD" "$N1" "$N2" echo ">> stamp DrbdOptions/SkipDisk=True onto $N2 (simulates pre-upgrade defensive stamp)" -# Patch Resource.Spec.Props directly via kubectl — bypasses the -# linstor CLI's prop-set path so we mirror the observer's SSA write -# exactly. The Bug 278 fix gates auto-clear off "prop pinned AND -# kernel healthy", regardless of the prop's origin. -kubectl patch "resources.blockstor.cozystack.io/${RD}.${N2}" --type=merge -p \ - '{"spec":{"props":{"DrbdOptions/SkipDisk":"True"}}}' +# Server-side-apply under the OBSERVER's field manager +# (`blockstor-satellite-skipdisk`, pkg/satellite/controllers/observer.go) +# so the stamp carries the exact SSA ownership the real defensive write +# has. The Bug 278 auto-clear RELEASES that owner's claim — that is the +# whole product contract: the satellite un-stamps only its own +# defensive writes, while an operator-set SkipDisk (different field +# manager) survives the reattach. The previous revision of this cell +# stamped via `kubectl patch --type=merge` (field manager +# "kubectl-patch"), which the release path correctly refuses to clear — +# the cell was asserting the opposite of the product contract and could +# never pass (BUG-040 triage). +# +# The apply doc must carry the two +required immutable scalars +# (resourceDefinitionName / nodeName) — same SSA-validation rule the +# observer's writeSkipDiskProp follows. +# --force-conflicts mirrors the observer's ForceOwnership: the apply +# doc must claim the two required scalars even though the controller's +# field manager owns them. +kubectl apply --server-side --force-conflicts --field-manager=blockstor-satellite-skipdisk -f - <> confirm SkipDisk is stamped on $N2 Spec.Props" stamped=$(kubectl get "resources.blockstor.cozystack.io/${RD}.${N2}" \ diff --git a/tests/e2e/cli-matrix/lib.sh b/tests/e2e/cli-matrix/lib.sh index bce908e3..4383a10c 100755 --- a/tests/e2e/cli-matrix/lib.sh +++ b/tests/e2e/cli-matrix/lib.sh @@ -322,6 +322,29 @@ wait_replica_count() { return 1 } +# wait_diskful_count [timeout=60] — poll until exactly +# DISKFUL replicas exist (DISKLESS / TIE_BREAKER rows are +# ignored). Use this — not wait_replica_count — after `r c --auto-place` +# of an even count: the auto-quorum machinery legitimately adds a +# diskless TIE_BREAKER witness on a spare node within seconds, so the +# total-row count overshoots the requested place count and the +# total-count wait flakes on the witness race (BUG-040 sweep signature: +# "never reached count=2 (last=3)"). +wait_diskful_count() { + local rd=$1 expected=$2 timeout=${3:-60} + local deadline=$(( $(date +%s) + timeout )) + local cur=0 + while (( $(date +%s) < deadline )); do + cur=$(linstor_diskful_count "$rd") + if [[ "$cur" == "$expected" ]]; then + return 0 + fi + sleep 2 + done + echo "wait_diskful_count: ${rd} never reached diskful=${expected} (last=${cur}) within ${timeout}s" >&2 + return 1 +} + # wait_replica_absent [timeout=30] — poll until no Resource # CRD exists for (rd, node). Used after `linstor r d ` so the # next phase can act on a known-clean shape. diff --git a/tests/e2e/cli-matrix/multi-volume-late-vd-create.sh b/tests/e2e/cli-matrix/multi-volume-late-vd-create.sh index 28a5055b..5cea3ce8 100755 --- a/tests/e2e/cli-matrix/multi-volume-late-vd-create.sh +++ b/tests/e2e/cli-matrix/multi-volume-late-vd-create.sh @@ -69,8 +69,11 @@ echo ">> [Bug 332] rd c + vd c (vol-0)" echo ">> [Bug 332] r c --auto-place=3 -s $POOL" "${LCTL[@]}" resource create --auto-place=3 --storage-pool="$POOL" "$RD" >/dev/null -echo ">> wait up to 120s for vol-0 to reach UpToDate on all 3 replicas" -deadline=$(( $(date +%s) + 120 )) +# 240s: a 1G x3 initial sync shares the stand with the rest of the +# sweep; the previous 120s budget flaked under load (the failure dump +# printed all three replicas UpToDate moments after the deadline). +echo ">> wait up to 240s for vol-0 to reach UpToDate on all 3 replicas" +deadline=$(( $(date +%s) + 240 )) all_up=false while (( $(date +%s) < deadline )); do # Per-replica volume-0 state, three rows expected. The wire diff --git a/tests/e2e/cli-matrix/n-d-evicted-rejected.sh b/tests/e2e/cli-matrix/n-d-evicted-rejected.sh index bace635a..b4b6f0ca 100755 --- a/tests/e2e/cli-matrix/n-d-evicted-rejected.sh +++ b/tests/e2e/cli-matrix/n-d-evicted-rejected.sh @@ -62,7 +62,9 @@ echo ">> [F6] rd c + vd c + r c --auto-place=2 -s $POOL" "${LCTL[@]}" resource-definition create "$RD" >/dev/null "${LCTL[@]}" volume-definition create "$RD" 256M >/dev/null "${LCTL[@]}" resource create --auto-place=2 --storage-pool="$POOL" "$RD" >/dev/null -wait_replica_count "$RD" 2 90 +# Diskful count, not total rows: the auto-tiebreaker witness lands on +# the spare node within seconds and is a legal third row (BUG-040). +wait_diskful_count "$RD" 2 90 mapfile -t diskful < <(linstor_diskful_nodes "$RD") if (( ${#diskful[@]} < 2 )); then diff --git a/tests/e2e/cli-matrix/n-evacuate-prunes-source.sh b/tests/e2e/cli-matrix/n-evacuate-prunes-source.sh index 680aad1c..1a4645ef 100755 --- a/tests/e2e/cli-matrix/n-evacuate-prunes-source.sh +++ b/tests/e2e/cli-matrix/n-evacuate-prunes-source.sh @@ -74,7 +74,9 @@ echo ">> [Bug 389] rd c + vd c + r c --auto-place=2 -s $POOL" "${LCTL[@]}" resource create --auto-place=2 --storage-pool="$POOL" "$RD" >/dev/null echo ">> wait for 2 diskful replicas, both UpToDate" -wait_replica_count "$RD" 2 90 +# Diskful count, not total rows: the auto-tiebreaker witness lands on +# the spare node within seconds and is a legal third row (BUG-040). +wait_diskful_count "$RD" 2 90 mapfile -t diskful < <(linstor_diskful_nodes "$RD") if (( ${#diskful[@]} != 2 )); then die "[Bug 389] expected 2 diskful replicas after autoplace=2, got ${#diskful[@]}: ${diskful[*]}" @@ -88,9 +90,17 @@ EVAC_NODE="${diskful[0]}" KEEP_NODE="${diskful[1]}" echo ">> [Bug 389] evacuating $EVAC_NODE (keeping $KEEP_NODE)" -# The replacement must land on the one node that has the pool but no -# replica yet. -SPARE_NODE=$(linstor_pick_free_node "$RD" "$EVAC_NODE" "$KEEP_NODE") +# The replacement must land on the one node that holds no DISKFUL +# replica yet. NOTE: do not use linstor_pick_free_node here — the +# auto-tiebreaker witness legally occupies the spare node's Resource +# CRD slot (BUG-040), and the evacuation replacement promotes that +# witness in place rather than creating a fresh row. +SPARE_NODE="" +for n in "$WORKER_1" "$WORKER_2" "$WORKER_3"; do + [[ -z "$n" || "$n" == "$EVAC_NODE" || "$n" == "$KEEP_NODE" ]] && continue + SPARE_NODE="$n" + break +done if [[ -z "$SPARE_NODE" ]]; then echo "SKIP: no spare $POOL node free for the evacuation replacement" exit 0 @@ -99,18 +109,22 @@ echo " replacement expected on $SPARE_NODE" "${LCTL[@]}" node evacuate "$EVAC_NODE" >/dev/null -echo ">> [Bug 389] wait for replacement on $SPARE_NODE to reach UpToDate" -# The replacement Resource CRD must appear and converge before the -# source is allowed to drop (strict add-before-drop). +echo ">> [Bug 389] wait for replacement on $SPARE_NODE to become DISKFUL" +# The replacement must appear AS A DISKFUL replica (a fresh create, or +# an in-place promote of the tiebreaker witness already parked there) +# and converge before the source is allowed to drop (strict +# add-before-drop). deadline=$(( $(date +%s) + 120 )) +spare_diskful=false while (( $(date +%s) < deadline )); do - if kubectl get "resources.blockstor.cozystack.io/${RD}.${SPARE_NODE}" >/dev/null 2>&1; then + if linstor_diskful_nodes "$RD" | grep -qx "$SPARE_NODE"; then + spare_diskful=true break fi sleep 3 done -if ! kubectl get "resources.blockstor.cozystack.io/${RD}.${SPARE_NODE}" >/dev/null 2>&1; then - die "[Bug 389] evacuate never placed a replacement replica on $SPARE_NODE" +if [[ "$spare_diskful" != "true" ]]; then + die "[Bug 389] evacuate never placed a diskful replacement replica on $SPARE_NODE" fi wait_status_state "$RD" "$SPARE_NODE" "UpToDate|UpToDate\\(100%\\)" 240 diff --git a/tests/e2e/cli-matrix/n-evict-tiebreaker-no-shuffle.sh b/tests/e2e/cli-matrix/n-evict-tiebreaker-no-shuffle.sh index 9167a7a3..5a55a420 100755 --- a/tests/e2e/cli-matrix/n-evict-tiebreaker-no-shuffle.sh +++ b/tests/e2e/cli-matrix/n-evict-tiebreaker-no-shuffle.sh @@ -43,10 +43,12 @@ # TestBug385EvictTiebreakerNodeRelocatesWitnessToSpare). # # This L6 cell is the stand-side companion: it builds the 2r+tb shape on -# 3 workers, evicts the tiebreaker node via the real `linstor n e` CLI, -# and asserts that within 30s (a) the EVICTED node no longer hosts a -# witness, and (b) BOTH original diskful replicas are still diskful — no -# healthy UpToDate replica was demoted to TIE_BREAKER. +# 3 workers, drains the tiebreaker node into EVICTED via the real +# `linstor node evacuate` CLI (linstor-client has no `node evict` verb — +# the repro's `n e` reached EVICTED through the evacuate/auto-evict +# machinery), and asserts that within 30s (a) the EVICTED node no longer +# hosts a witness, and (b) BOTH original diskful replicas are still +# diskful — no healthy UpToDate replica was demoted to TIE_BREAKER. set -euo pipefail @@ -121,13 +123,22 @@ echo " diskful pair: $uptodate_pair tiebreaker: $tb_node" DISKFUL_A=$(echo "$uptodate_pair" | awk '{print $1}') DISKFUL_B=$(echo "$uptodate_pair" | awk '{print $2}') -# Evict the tiebreaker node — the exact operator action from the repro. -echo ">> linstor n e $tb_node (evict the tiebreaker node)" +# Drain the tiebreaker node into the EVICTED state. NOTE: linstor-client +# (1.27.1) has NO `node evict` verb — eviction is either automatic +# (auto-evict) or operator-driven via `node evacuate`, which stamps the +# same EVICTED flag this cell asserts on. The original `node evict` +# invocation died in the CLI's argparse (exit 2, no REST call) and the +# cell could never pass (BUG-040 triage). +echo ">> linstor n evacuate $tb_node (drain the tiebreaker node → EVICTED)" EVICTED_NODE=$tb_node -"${LCTL[@]}" node evict "$tb_node" >/dev/null 2>&1 || { - echo "FAIL: n e (node evict) exited non-zero" >&2 +err_file=$(mktemp) +if ! "${LCTL[@]}" node evacuate "$tb_node" >/dev/null 2>"$err_file"; then + echo "FAIL: n evacuate exited non-zero" >&2 + cat "$err_file" >&2 + rm -f "$err_file" exit 1 -} +fi +rm -f "$err_file" # The node must show EVICTED in `linstor n l` (evict took effect at the # node level). diff --git a/tests/e2e/cli-matrix/n-rst-recreates-tiebreaker.sh b/tests/e2e/cli-matrix/n-rst-recreates-tiebreaker.sh index b7281e01..8b51978f 100755 --- a/tests/e2e/cli-matrix/n-rst-recreates-tiebreaker.sh +++ b/tests/e2e/cli-matrix/n-rst-recreates-tiebreaker.sh @@ -176,11 +176,23 @@ fi # Belt-and-suspenders: the operator-visible `linstor r l` surface the # bug report cited must show exactly one TIE_BREAKER row plus the two -# diskful replicas. -wire=$(linstor_r_l_json "$RD") -n_tb=$(printf '%s' "$wire" \ - | jq -r '.[][] | select((.rsc_flags // []) | index("TIE_BREAKER")) | .name' 2>/dev/null \ - | wc -l | tr -d ' ' || echo 0) +# diskful replicas. Poll briefly: the CRD loop above sees the witness +# the instant it is created, but the apiserver's `r l` list view reads +# its informer cache, which can lag the write by a moment (BUG-040 +# sweep: the one-shot check here flaked with "0 TIE_BREAKER rows" while +# the witness CRD already existed). +deadline=$(( $(date +%s) + 30 )) +n_tb=0 +while (( $(date +%s) < deadline )); do + wire=$(linstor_r_l_json "$RD") + n_tb=$(printf '%s' "$wire" \ + | jq -r '.[][] | select((.rsc_flags // []) | index("TIE_BREAKER")) | .name' 2>/dev/null \ + | wc -l | tr -d ' ' || echo 0) + if [[ "$n_tb" == "1" ]]; then + break + fi + sleep 2 +done if [[ "$n_tb" != "1" ]]; then echo "FAIL (Bug 386): linstor r l shows $n_tb TIE_BREAKER rows for $RD, want 1" >&2 printf '%s\n' "$wire" | jq '.[][]| {name, node_name, flags: .rsc_flags}' 2>/dev/null >&2 || true diff --git a/tests/operator-harness/replay/n-autoplace-target-excludes.yaml b/tests/operator-harness/replay/n-autoplace-target-excludes.yaml index 906c7ce6..3c6d257d 100644 --- a/tests/operator-harness/replay/n-autoplace-target-excludes.yaml +++ b/tests/operator-harness/replay/n-autoplace-target-excludes.yaml @@ -54,13 +54,14 @@ steps: expect_exit: 0 await: # THE F4 ASSERTION: the AutoplaceTarget=false node never received - # a DISKFUL replica — both copies landed on node2 + node3. On a - # 3-worker stand the auto-tiebreaker witness legitimately lands on - # the only remaining node (node1), so a Resource CRD for node1 - # exists as a diskless TIE_BREAKER and resource_absent can never - # hold — assert "not diskful" instead (same lesson as the - # toggle-disk --migrate-from replay). - kind: replica_diskless + # ANY auto-placed replica — both copies landed on node2 + node3, + # and (BUG-040) the auto-tiebreaker witness is an automatic + # placement too, so it must not land on the drained node either. + # With node1 the only remaining spare, no witness is possible + # (quorum falls back to off) and node1 carries no Resource CRD at + # all. An earlier revision asserted replica_diskless here, which + # codified the pre-BUG-040 witness-on-drained-node violation. + kind: resource_absent rd: "{{rd}}" node: "{{node1}}" timeout_s: 30