Skip to content

Commit 39ed482

Browse files
authored
Namespace and PVC probes (#2535)
Adds readiness probing via CEL for namespaces and PVCs, to prevent subsequent phases from installing until their readiness checks have passed. Also adds e2e coverage via direct CER creation. Increased e2e timeout to 15m for experimental target. Signed-off-by: Daniel Franz <dfranz@redhat.com>
1 parent dee8ed5 commit 39ed482

7 files changed

Lines changed: 225 additions & 29 deletions

File tree

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,10 @@ verify-crd-compatibility: $(CRD_DIFF) manifests
243243
.PHONY: test
244244
test: manifests generate fmt lint test-unit test-e2e test-regression #HELP Run all tests.
245245

246+
E2E_TIMEOUT ?= 10m
246247
.PHONY: e2e
247248
e2e: #EXHELP Run the e2e tests.
248-
go test -count=1 -v ./test/e2e/features_test.go
249+
go test -count=1 -v ./test/e2e/features_test.go -timeout=$(E2E_TIMEOUT)
249250

250251
E2E_REGISTRY_NAME := docker-registry
251252
E2E_REGISTRY_NAMESPACE := operator-controller-e2e
@@ -317,6 +318,7 @@ test-experimental-e2e: GO_BUILD_EXTRA_FLAGS := -cover
317318
test-experimental-e2e: COVERAGE_NAME := experimental-e2e
318319
test-experimental-e2e: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST)
319320
test-experimental-e2e: PROMETHEUS_VALUES := helm/prom_experimental.yaml
321+
test-experimental-e2e: E2E_TIMEOUT := 15m
320322
test-experimental-e2e: run-internal image-registry prometheus e2e e2e-coverage kind-clean #HELP Run experimental e2e test suite on local kind cluster
321323

322324
.PHONY: prometheus

internal/operator-controller/controllers/clusterextensionrevision_controller.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"errors"
99
"fmt"
1010
"strings"
11+
"sync"
1112
"time"
1213

1314
appsv1 "k8s.io/api/apps/v1"
@@ -462,10 +463,13 @@ func (c *ClusterExtensionRevisionReconciler) toBoxcutterRevision(ctx context.Con
462463
previousObjs[i] = rev
463464
}
464465

466+
if err = initializeProbes(); err != nil {
467+
return nil, nil, err
468+
}
465469
opts := []boxcutter.RevisionReconcileOption{
466470
boxcutter.WithPreviousOwners(previousObjs),
467471
boxcutter.WithProbe(boxcutter.ProgressProbeType, probing.And{
468-
deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe,
472+
&namespaceActiveProbe, deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, &pvcBoundProbe,
469473
}),
470474
}
471475

@@ -511,6 +515,28 @@ func EffectiveCollisionProtection(cp ...ocv1.CollisionProtection) ocv1.Collision
511515
return ecp
512516
}
513517

518+
// initializeProbes is used to initialize CEL probes once, so we don't recreate them on every reconcile
519+
var initializeProbes = sync.OnceValue(func() error {
520+
nsCEL, err := probing.NewCELProbe(namespaceActiveCEL, `namespace phase must be "Active"`)
521+
if err != nil {
522+
return fmt.Errorf("initializing namespace CEL probe: %w", err)
523+
}
524+
pvcCEL, err := probing.NewCELProbe(pvcBoundCEL, `persistentvolumeclaim phase must be "Bound"`)
525+
if err != nil {
526+
return fmt.Errorf("initializing PVC CEL probe: %w", err)
527+
}
528+
namespaceActiveProbe = probing.GroupKindSelector{
529+
GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "Namespace"},
530+
Prober: nsCEL,
531+
}
532+
pvcBoundProbe = probing.GroupKindSelector{
533+
GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "PersistentVolumeClaim"},
534+
Prober: pvcCEL,
535+
}
536+
537+
return nil
538+
})
539+
514540
var (
515541
deploymentProbe = &probing.GroupKindSelector{
516542
GroupKind: schema.GroupKind{Group: appsv1.GroupName, Kind: "Deployment"},
@@ -542,6 +568,14 @@ var (
542568
},
543569
}
544570

571+
// namespaceActiveCEL is a CEL rule which asserts that the namespace is in "Active" phase
572+
namespaceActiveCEL = `self.status.phase == "Active"`
573+
namespaceActiveProbe probing.GroupKindSelector
574+
575+
// pvcBoundCEL is a CEL rule which asserts that the PVC is in "Bound" phase
576+
pvcBoundCEL = `self.status.phase == "Bound"`
577+
pvcBoundProbe probing.GroupKindSelector
578+
545579
// deplStaefulSetProbe probes Deployment, StatefulSet objects.
546580
deplStatefulSetProbe = &probing.ObservedGenerationProbe{
547581
Prober: probing.And{

test/e2e/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ Leverage existing steps for common operations:
199199
Use these variables in YAML templates:
200200

201201
- `${NAME}`: Scenario-specific ClusterExtension name (e.g., `ce-123`)
202+
- `${CER_NAME}`: Scenario-specific ClusterExtensionRevision name (e.g., `cer-123`; for applying CERs directly)
202203
- `${TEST_NAMESPACE}`: Scenario-specific namespace (e.g., `ns-123`)
203204
- `${CATALOG_IMG}`: Catalog image reference (defaults to in-cluster registry, overridable via `CATALOG_IMG` env var)
204205

test/e2e/features/revision.feature

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
@BoxcutterRuntime
2+
Feature: Install ClusterExtensionRevision
3+
4+
As an OLM user I would like to install a cluster extension revision directly, without using the cluster extension API.
5+
6+
Background:
7+
Given OLM is available
8+
And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}
9+
10+
Scenario: Probe failure for PersistentVolumeClaim halts phase progression
11+
When ClusterExtensionRevision is applied
12+
"""
13+
apiVersion: olm.operatorframework.io/v1
14+
kind: ClusterExtensionRevision
15+
metadata:
16+
annotations:
17+
olm.operatorframework.io/service-account-name: olm-sa
18+
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
19+
name: ${CER_NAME}
20+
spec:
21+
lifecycleState: Active
22+
collisionProtection: Prevent
23+
phases:
24+
- name: pvc
25+
objects:
26+
- object:
27+
apiVersion: v1
28+
kind: PersistentVolumeClaim
29+
metadata:
30+
name: test-pvc
31+
namespace: ${TEST_NAMESPACE}
32+
spec:
33+
accessModes:
34+
- ReadWriteOnce
35+
storageClassName: ""
36+
volumeName: test-pv
37+
resources:
38+
requests:
39+
storage: 1Mi
40+
- name: configmap
41+
objects:
42+
- object:
43+
apiVersion: v1
44+
kind: ConfigMap
45+
metadata:
46+
name: test-configmap
47+
namespace: ${TEST_NAMESPACE}
48+
data:
49+
name: test-configmap
50+
version: v1.2.0
51+
revision: 1
52+
"""
53+
54+
Then resource "persistentvolumeclaim/test-pvc" is installed
55+
And ClusterExtensionRevision "${CER_NAME}" reports Available as False with Reason ProbeFailure and Message:
56+
"""
57+
Object PersistentVolumeClaim.v1 ${TEST_NAMESPACE}/test-pvc: persistentvolumeclaim phase must be "Bound"
58+
"""
59+
And resource "configmap/test-configmap" is not installed
60+
61+
Scenario: Phases progress when PersistentVolumeClaim becomes "Bound"
62+
When ClusterExtensionRevision is applied
63+
"""
64+
apiVersion: olm.operatorframework.io/v1
65+
kind: ClusterExtensionRevision
66+
metadata:
67+
annotations:
68+
olm.operatorframework.io/service-account-name: olm-sa
69+
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
70+
name: ${CER_NAME}
71+
spec:
72+
lifecycleState: Active
73+
collisionProtection: Prevent
74+
phases:
75+
- name: pvc
76+
objects:
77+
- object:
78+
apiVersion: v1
79+
kind: PersistentVolumeClaim
80+
metadata:
81+
name: test-pvc
82+
namespace: ${TEST_NAMESPACE}
83+
spec:
84+
accessModes:
85+
- ReadWriteOnce
86+
storageClassName: ""
87+
volumeName: test-pv
88+
resources:
89+
requests:
90+
storage: 1Mi
91+
- object:
92+
apiVersion: v1
93+
kind: PersistentVolume
94+
metadata:
95+
name: test-pv
96+
spec:
97+
accessModes:
98+
- ReadWriteOnce
99+
capacity:
100+
storage: 1Mi
101+
claimRef:
102+
apiVersion: v1
103+
kind: PersistentVolumeClaim
104+
name: test-pvc
105+
namespace: ${TEST_NAMESPACE}
106+
persistentVolumeReclaimPolicy: Delete
107+
storageClassName: ""
108+
volumeMode: Filesystem
109+
local:
110+
path: /tmp/persistent-volume
111+
nodeAffinity:
112+
required:
113+
nodeSelectorTerms:
114+
- matchExpressions:
115+
- key: kubernetes.io/hostname
116+
operator: NotIn
117+
values:
118+
- a-node-name-that-should-not-exist
119+
- name: configmap
120+
objects:
121+
- object:
122+
apiVersion: v1
123+
kind: ConfigMap
124+
metadata:
125+
name: test-configmap
126+
namespace: ${TEST_NAMESPACE}
127+
data:
128+
name: test-configmap
129+
version: v1.2.0
130+
revision: 1
131+
"""
132+
133+
Then ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded
134+
And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded
135+
And resource "persistentvolume/test-pv" is installed
136+
And resource "persistentvolumeclaim/test-pvc" is installed
137+
And resource "configmap/test-configmap" is installed

test/e2e/steps/hooks.go

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,13 @@ type resource struct {
2727
}
2828

2929
type scenarioContext struct {
30-
id string
31-
namespace string
32-
clusterExtensionName string
33-
removedResources []unstructured.Unstructured
34-
backGroundCmds []*exec.Cmd
35-
metricsResponse map[string]string
30+
id string
31+
namespace string
32+
clusterExtensionName string
33+
clusterExtensionRevisionName string
34+
removedResources []unstructured.Unstructured
35+
backGroundCmds []*exec.Cmd
36+
metricsResponse map[string]string
3637

3738
extensionObjects []client.Object
3839
}
@@ -142,9 +143,10 @@ func CheckFeatureTags(ctx context.Context, sc *godog.Scenario) (context.Context,
142143

143144
func CreateScenarioContext(ctx context.Context, sc *godog.Scenario) (context.Context, error) {
144145
scCtx := &scenarioContext{
145-
id: sc.Id,
146-
namespace: fmt.Sprintf("ns-%s", sc.Id),
147-
clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id),
146+
id: sc.Id,
147+
namespace: fmt.Sprintf("ns-%s", sc.Id),
148+
clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id),
149+
clusterExtensionRevisionName: fmt.Sprintf("cer-%s", sc.Id),
148150
}
149151
return context.WithValue(ctx, scenarioContextKey, scCtx), nil
150152
}
@@ -176,13 +178,16 @@ func ScenarioCleanup(ctx context.Context, _ *godog.Scenario, err error) (context
176178
if sc.clusterExtensionName != "" {
177179
forDeletion = append(forDeletion, resource{name: sc.clusterExtensionName, kind: "clusterextension"})
178180
}
181+
if sc.clusterExtensionRevisionName != "" {
182+
forDeletion = append(forDeletion, resource{name: sc.clusterExtensionRevisionName, kind: "clusterextensionrevision"})
183+
}
179184
forDeletion = append(forDeletion, resource{name: sc.namespace, kind: "namespace"})
180-
go func() {
181-
for _, r := range forDeletion {
182-
if _, err := k8sClient("delete", r.kind, r.name, "--ignore-not-found=true"); err != nil {
183-
logger.Info("Error deleting resource", "name", r.name, "namespace", sc.namespace, "stderr", stderrOutput(err))
185+
for _, r := range forDeletion {
186+
go func(res resource) {
187+
if _, err := k8sClient("delete", res.kind, res.name, "--ignore-not-found=true"); err != nil {
188+
logger.Info("Error deleting resource", "name", res.name, "namespace", sc.namespace, "stderr", stderrOutput(err))
184189
}
185-
}
186-
}()
190+
}(r)
191+
}
187192
return ctx, nil
188193
}

test/e2e/steps/steps.go

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ func RegisterSteps(sc *godog.ScenarioContext) {
7171
sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+)$`, ClusterExtensionReportsConditionWithoutMsg)
7272
sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+)$`, ClusterExtensionReportsConditionWithoutReason)
7373
sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+)$`, ClusterExtensionRevisionReportsConditionWithoutMsg)
74+
sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+) and Message:$`, ClusterExtensionRevisionReportsConditionWithMsg)
7475
sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) transition between (\d+) and (\d+) minutes since its creation$`, ClusterExtensionReportsConditionTransitionTime)
76+
sc.Step(`^(?i)ClusterExtensionRevision is applied(?:\s+.*)?$`, ResourceIsApplied)
7577
sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" is archived$`, ClusterExtensionRevisionIsArchived)
7678
sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" contains annotation "([^"]+)" with value$`, ClusterExtensionRevisionHasAnnotationWithValue)
7779
sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" has label "([^"]+)" with value "([^"]+)"$`, ClusterExtensionRevisionHasLabelWithValue)
@@ -80,7 +82,7 @@ func RegisterSteps(sc *godog.ScenarioContext) {
8082
sc.Step(`^(?i)resource "([^"]+)" is installed$`, ResourceAvailable)
8183
sc.Step(`^(?i)resource "([^"]+)" is available$`, ResourceAvailable)
8284
sc.Step(`^(?i)resource "([^"]+)" is removed$`, ResourceRemoved)
83-
sc.Step(`^(?i)resource "([^"]+)" is eventually not found$`, ResourceEventuallyNotFound)
85+
sc.Step(`^(?i)resource "([^"]+)" is (?:eventually not found|not installed)$`, ResourceEventuallyNotFound)
8486
sc.Step(`^(?i)resource "([^"]+)" exists$`, ResourceAvailable)
8587
sc.Step(`^(?i)resource is applied$`, ResourceIsApplied)
8688
sc.Step(`^(?i)resource "deployment/test-operator" reports as (not ready|ready)$`, MarkTestOperatorNotReady)
@@ -186,6 +188,7 @@ func substituteScenarioVars(content string, sc *scenarioContext) string {
186188
vars := map[string]string{
187189
"TEST_NAMESPACE": sc.namespace,
188190
"NAME": sc.clusterExtensionName,
191+
"CER_NAME": sc.clusterExtensionRevisionName,
189192
"CATALOG_IMG": "docker-registry.operator-controller-e2e.svc.cluster.local:5000/e2e/test-catalog:v1",
190193
}
191194
if v, found := os.LookupEnv("CATALOG_IMG"); found {
@@ -235,8 +238,8 @@ func ClusterExtensionVersionUpdate(ctx context.Context, version string) error {
235238
return err
236239
}
237240

238-
// ResourceIsApplied applies the provided YAML resource to the cluster and in case of ClusterExtension it captures its name in the test context
239-
// so that it can be referred to in later steps with ${NAME}
241+
// ResourceIsApplied applies the provided YAML resource to the cluster and in case of ClusterExtension or ClusterExtensionRevision it captures
242+
// its name in the test context so that it can be referred to in later steps with ${NAME} or ${CER_NAME}, respectively
240243
func ResourceIsApplied(ctx context.Context, yamlTemplate *godog.DocString) error {
241244
sc := scenarioCtx(ctx)
242245
yamlContent := substituteScenarioVars(yamlTemplate.Content, sc)
@@ -246,10 +249,12 @@ func ResourceIsApplied(ctx context.Context, yamlTemplate *godog.DocString) error
246249
}
247250
out, err := k8scliWithInput(yamlContent, "apply", "-f", "-")
248251
if err != nil {
249-
return fmt.Errorf("failed to apply resource %v %w", out, err)
252+
return fmt.Errorf("failed to apply resource %v; err: %w; stderr: %s", out, err, stderrOutput(err))
250253
}
251254
if res.GetKind() == "ClusterExtension" {
252255
sc.clusterExtensionName = res.GetName()
256+
} else if res.GetKind() == "ClusterExtensionRevision" {
257+
sc.clusterExtensionRevisionName = res.GetName()
253258
}
254259
return nil
255260
}
@@ -378,6 +383,17 @@ type msgMatchFn func(string) bool
378383

379384
func alwaysMatch(_ string) bool { return true }
380385

386+
func messageComparison(ctx context.Context, msg *godog.DocString) msgMatchFn {
387+
msgCmp := alwaysMatch
388+
if msg != nil {
389+
expectedMsg := substituteScenarioVars(strings.Join(strings.Fields(msg.Content), " "), scenarioCtx(ctx))
390+
msgCmp = func(actual string) bool {
391+
return actual == expectedMsg
392+
}
393+
}
394+
return msgCmp
395+
}
396+
381397
func waitForCondition(ctx context.Context, resourceType, resourceName, conditionType, conditionStatus string, conditionReason *string, msgCmp msgMatchFn) error {
382398
require.Eventually(godog.T(ctx), func() bool {
383399
v, err := k8sClient("get", resourceType, resourceName, "-o", fmt.Sprintf("jsonpath={.status.conditions[?(@.type==\"%s\")]}", conditionType))
@@ -412,14 +428,7 @@ func waitForExtensionCondition(ctx context.Context, conditionType, conditionStat
412428
// ClusterExtensionReportsCondition waits for the ClusterExtension to have a condition matching the specified type,
413429
// status, reason, and exact message. Polls with timeout.
414430
func ClusterExtensionReportsCondition(ctx context.Context, conditionType, conditionStatus, conditionReason string, msg *godog.DocString) error {
415-
msgCmp := alwaysMatch
416-
if msg != nil {
417-
expectedMsg := substituteScenarioVars(strings.Join(strings.Fields(msg.Content), " "), scenarioCtx(ctx))
418-
msgCmp = func(actual string) bool {
419-
return actual == expectedMsg
420-
}
421-
}
422-
return waitForExtensionCondition(ctx, conditionType, conditionStatus, &conditionReason, msgCmp)
431+
return waitForExtensionCondition(ctx, conditionType, conditionStatus, &conditionReason, messageComparison(ctx, msg))
423432
}
424433

425434
// ClusterExtensionReportsConditionWithMessageFragment waits for the ClusterExtension to have a condition matching
@@ -512,6 +521,12 @@ func ClusterExtensionRevisionReportsConditionWithoutMsg(ctx context.Context, rev
512521
return waitForCondition(ctx, "clusterextensionrevision", substituteScenarioVars(revisionName, scenarioCtx(ctx)), conditionType, conditionStatus, &conditionReason, nil)
513522
}
514523

524+
// ClusterExtensionRevisionReportsConditionWithMsg waits for the named ClusterExtensionRevision to have a condition
525+
// matching type, status, reason, and message. Polls with timeout.
526+
func ClusterExtensionRevisionReportsConditionWithMsg(ctx context.Context, revisionName, conditionType, conditionStatus, conditionReason string, msg *godog.DocString) error {
527+
return waitForCondition(ctx, "clusterextensionrevision", substituteScenarioVars(revisionName, scenarioCtx(ctx)), conditionType, conditionStatus, &conditionReason, messageComparison(ctx, msg))
528+
}
529+
515530
// ClusterExtensionRevisionIsArchived waits for the named ClusterExtensionRevision to have Progressing=False
516531
// with reason Archived. Polls with timeout.
517532
func ClusterExtensionRevisionIsArchived(ctx context.Context, revisionName string) error {

test/e2e/steps/testdata/rbac-template.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ rules:
2828
- serviceaccounts
2929
- events
3030
- namespaces
31+
- persistentvolumes
32+
- persistentvolumeclaims
3133
verbs: [update, create, list, watch, get, delete, patch]
3234
- apiGroups: ["apps"]
3335
resources:

0 commit comments

Comments
 (0)