Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 151 additions & 6 deletions test/library/encryption/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"k8s.io/client-go/util/retry"

configv1 "github.com/openshift/api/config/v1"
operatorv1 "github.com/openshift/api/operator/v1"
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"

"github.com/openshift/library-go/test/library"
Expand Down Expand Up @@ -56,13 +57,14 @@ type EncryptionKeyMeta struct {

type UpdateUnsupportedConfigFunc func(raw []byte) error

func SetAndWaitForEncryptionType(t testing.TB, provider configv1.APIServerEncryption, defaultTargetGRs []schema.GroupResource, namespace, labelSelector string) ClientSet {
t.Helper()
t.Logf("Starting encryption e2e test for %q mode", provider.Type)
// GetOperatorConditionsFuncType fetches operator conditions (e.g. for EncryptionMigrationControllerProgressing).
type GetOperatorConditionsFuncType func(t testing.TB) ([]operatorv1.OperatorCondition, error)

clientSet := GetClients(t)
lastMigratedKeyMeta, err := GetLastKeyMeta(t, clientSet.Kube, namespace, labelSelector)
require.NoError(t, err)
// ApplyAPIServerEncryptionType updates the cluster APIServer encryption spec to the desired provider without
// waiting for storage migration. Callers that need previous key metadata should call GetLastKeyMeta before this.
func ApplyAPIServerEncryptionType(t testing.TB, clientSet ClientSet, provider configv1.APIServerEncryption, _ []schema.GroupResource) {
t.Helper()
t.Logf("Applying APIServer encryption type %q (not waiting for migration)", provider.Type)

apiServer, err := clientSet.ApiServerConfig.Get(context.TODO(), "cluster", metav1.GetOptions{})
require.NoError(t, err)
Expand All @@ -75,6 +77,17 @@ func SetAndWaitForEncryptionType(t testing.TB, provider configv1.APIServerEncryp
} else {
t.Logf("APIServer is already configured to use %q mode", provider.Type)
}
}

func SetAndWaitForEncryptionType(t testing.TB, provider configv1.APIServerEncryption, defaultTargetGRs []schema.GroupResource, namespace, labelSelector string) ClientSet {
t.Helper()
t.Logf("Starting encryption e2e test for %q mode", provider.Type)

clientSet := GetClients(t)
lastMigratedKeyMeta, err := GetLastKeyMeta(t, clientSet.Kube, namespace, labelSelector)
require.NoError(t, err)

ApplyAPIServerEncryptionType(t, clientSet, provider, defaultTargetGRs)

WaitForEncryptionKeyBasedOn(t, clientSet.Kube, lastMigratedKeyMeta, provider.Type, defaultTargetGRs, namespace, labelSelector)
return clientSet
Expand Down Expand Up @@ -276,6 +289,27 @@ func ForceKeyRotation(t testing.TB, updateUnsupportedConfig UpdateUnsupportedCon
})
}

// ClearForcedKeyRotationReason clears encryption.reason under UnsupportedConfigOverrides (same merge path as
// ForceKeyRotation). Call when a test finishes so the next test in sequence does not inherit a non-empty
// reason and the key controller does not keep seeing an external rotation request.
func ClearForcedKeyRotationReason(t testing.TB, updateUnsupportedConfig UpdateUnsupportedConfigFunc) error {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of a new fn why no to change teh ForceKeyRotation to register a cleanup routine via t.Cleanup ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why would I couple those two functionalities together? what if I just want to force a key rotation and not cleanup?

t.Helper()
t.Logf("Clearing forced encryption rotation reason (unsupported config overrides)")
data := map[string]map[string]string{
"encryption": {
"reason": "",
},
}
raw, err := json.Marshal(data)
if err != nil {
return err
}

return onErrorWithTimeout(wait.ForeverTestTimeout, retry.DefaultBackoff, orError(errors.IsConflict, transientAPIError), func() error {
return updateUnsupportedConfig(raw)
})
}

// hasResource returns whether the given group resource is contained in the migrated group resource list.
func hasResource(expectedResource schema.GroupResource, actualResources []schema.GroupResource) bool {
for _, gr := range actualResources {
Expand All @@ -286,6 +320,117 @@ func hasResource(expectedResource schema.GroupResource, actualResources []schema
return false
}

const encryptionMigrationControllerProgressingType = "EncryptionMigrationControllerProgressing"

// allTargetGRsMigrated reports whether every resource in targetGRs appears in meta's migrated list.
func allTargetGRsMigrated(meta EncryptionKeyMeta, targetGRs []schema.GroupResource) bool {
if len(targetGRs) == 0 {
return true
}
for _, gr := range targetGRs {
if !hasResource(gr, meta.Migrated) {
return false
}
}
return true
}

// WaitUntilEncryptionStable waits until the latest write key secret reports the expected mode and all target resources are migrated.
func WaitUntilEncryptionStable(t testing.TB, kube kubernetes.Interface, expectedMode configv1.EncryptionType, targetGRs []schema.GroupResource, namespace, labelSelector string) {
t.Helper()
wantMode := string(expectedMode)
err := wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector)
if err != nil {
return false, err
}
if meta.Mode != wantMode {
return false, nil
}
if !allTargetGRsMigrated(meta, targetGRs) {
return false, nil
}
return true, nil
})
require.NoError(t, err)
Comment on lines +339 to +355
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Normalize empty expectedMode to identity before polling.

WaitForEncryptionKeyBasedOn already maps "" to defaultEncryptionMode, but this helper compares meta.Mode to "" directly. Any caller that passes the zero-value encryption type will sit here until timeout, and WaitForNRotations inherits that behavior.

Suggested fix
 func WaitUntilEncryptionStable(t testing.TB, kube kubernetes.Interface, expectedMode configv1.EncryptionType, targetGRs []schema.GroupResource, namespace, labelSelector string) {
 	t.Helper()
 	wantMode := string(expectedMode)
+	if wantMode == "" {
+		wantMode = defaultEncryptionMode
+	}
 	err := wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
 		meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector)
 		if err != nil {
 			return false, err
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
func WaitUntilEncryptionStable(t testing.TB, kube kubernetes.Interface, expectedMode configv1.EncryptionType, targetGRs []schema.GroupResource, namespace, labelSelector string) {
t.Helper()
wantMode := string(expectedMode)
err := wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector)
if err != nil {
return false, err
}
if meta.Mode != wantMode {
return false, nil
}
if !allTargetGRsMigrated(meta, targetGRs) {
return false, nil
}
return true, nil
})
require.NoError(t, err)
func WaitUntilEncryptionStable(t testing.TB, kube kubernetes.Interface, expectedMode configv1.EncryptionType, targetGRs []schema.GroupResource, namespace, labelSelector string) {
t.Helper()
wantMode := string(expectedMode)
if wantMode == "" {
wantMode = defaultEncryptionMode
}
err := wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector)
if err != nil {
return false, err
}
if meta.Mode != wantMode {
return false, nil
}
if !allTargetGRsMigrated(meta, targetGRs) {
return false, nil
}
return true, nil
})
require.NoError(t, err)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/library/encryption/helpers.go` around lines 339 - 355,
WaitUntilEncryptionStable compares meta.Mode to the raw expectedMode which can
be the zero-value; normalize empty expectedMode to the canonical
defaultEncryptionMode before polling so callers passing "" don’t hang. In
WaitUntilEncryptionStable set wantMode := string(expectedMode) and if wantMode
== "" assign wantMode = defaultEncryptionMode (same mapping used by
WaitForEncryptionKeyBasedOn) so the meta.Mode comparison and
allTargetGRsMigrated checks use the normalized mode.

}

// WaitForNRotations waits until encryption is stable (expectedMode on the latest write key and every target
// group resource migrated), then asserts the latest write key secret's numeric suffix equals the baseline
// secret's suffix plus n. If baselineMeta.Name is empty (no prior write key), the baseline suffix is treated
// as 0. Use n to count new write-key revisions you expect after baselineMeta was captured (for example one
// per successful ForceKeyRotation, plus any additional revision from turning encryption on).
func WaitForNRotations(t testing.TB, kube kubernetes.Interface, expectedMode configv1.EncryptionType, targetGRs []schema.GroupResource, namespace, labelSelector string, baselineMeta EncryptionKeyMeta, n uint64) {
t.Helper()
WaitUntilEncryptionStable(t, kube, expectedMode, targetGRs, namespace, labelSelector)

finalMeta, err := GetLastKeyMeta(t, kube, namespace, labelSelector)
require.NoError(t, err)
finalID, ok := EncryptionWriteKeySecretID(finalMeta.Name)
require.True(t, ok, "latest encryption key name must carry a numeric suffix: %q", finalMeta.Name)

var baselineID uint64
if len(baselineMeta.Name) == 0 {
baselineID = 0
} else {
var baselineOK bool
baselineID, baselineOK = EncryptionWriteKeySecretID(baselineMeta.Name)
require.True(t, baselineOK, "baseline encryption key name must carry a numeric suffix: %q", baselineMeta.Name)
}

expectedFinalID := baselineID + n
require.Equal(t, expectedFinalID, finalID, "expected write-key id %d (baseline id %d + %d), got final key %q id %d", expectedFinalID, baselineID, n, finalMeta.Name, finalID)
}

// EncryptionWriteKeySecretID returns the numeric suffix of an encryption write-key secret name (the value
// used when sorting keys by revision, e.g. encryption-key-openshift-apiserver-4 yields 4).
func EncryptionWriteKeySecretID(secretName string) (uint64, bool) {
return encryptionKeyNameToKeyID(secretName)
}

// WaitForEncryptionMigrationInProgressWindow waits until storage migration is actively running so another
// encryption change can be stacked. It returns false when the migration for expectedWriteKey completed
// before an in-progress snapshot could be observed (caller may t.Skip).
func WaitForEncryptionMigrationInProgressWindow(t testing.TB, kube kubernetes.Interface, getOp GetOperatorConditionsFuncType, expectedWriteKey string, targetGRs []schema.GroupResource, namespace, labelSelector string) bool {
t.Helper()
const kubePoll = 1 * time.Second
const windowWait = 25 * time.Minute

if getOp != nil {
err := wait.Poll(2*time.Second, windowWait, func() (bool, error) {
conds, err := getOp(t)
if err != nil {
return false, err
}
for _, c := range conds {
if c.Type == encryptionMigrationControllerProgressingType && c.Status == operatorv1.ConditionTrue && c.Reason == "Migrating" {
return true, nil
}
}
return false, nil
})
if err == nil {
return true
}
t.Logf("encryption migration progressing condition not observed within %v, falling back to secret metadata polling: %v", windowWait, err)
}
Comment on lines +399 to +416
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Gate the operator-condition path on expectedWriteKey.

This branch returns true on any Migrating condition, even if expectedWriteKey has not become the active write key yet. Since the metadata path is the only branch that validates the key and it does not run until after the full operator poll window, a stale/previous migration can satisfy this early and the next rotation gets stacked at the wrong time.

Suggested fix
 	if getOp != nil {
 		err := wait.Poll(2*time.Second, windowWait, func() (bool, error) {
+			meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector)
+			if err != nil {
+				return false, err
+			}
+			if meta.Name != expectedWriteKey {
+				return false, nil
+			}
+			if allTargetGRsMigrated(meta, targetGRs) {
+				return false, nil
+			}
+
 			conds, err := getOp(t)
 			if err != nil {
 				return false, err
 			}
 			for _, c := range conds {
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if getOp != nil {
err := wait.Poll(2*time.Second, windowWait, func() (bool, error) {
conds, err := getOp(t)
if err != nil {
return false, err
}
for _, c := range conds {
if c.Type == encryptionMigrationControllerProgressingType && c.Status == operatorv1.ConditionTrue && c.Reason == "Migrating" {
return true, nil
}
}
return false, nil
})
if err == nil {
return true
}
t.Logf("encryption migration progressing condition not observed within %v, falling back to secret metadata polling: %v", windowWait, err)
}
if getOp != nil {
err := wait.Poll(2*time.Second, windowWait, func() (bool, error) {
meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector)
if err != nil {
return false, err
}
if meta.Name != expectedWriteKey {
return false, nil
}
if allTargetGRsMigrated(meta, targetGRs) {
return false, nil
}
conds, err := getOp(t)
if err != nil {
return false, err
}
for _, c := range conds {
if c.Type == encryptionMigrationControllerProgressingType && c.Status == operatorv1.ConditionTrue && c.Reason == "Migrating" {
return true, nil
}
}
return false, nil
})
if err == nil {
return true
}
t.Logf("encryption migration progressing condition not observed within %v, falling back to secret metadata polling: %v", windowWait, err)
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/library/encryption/helpers.go` around lines 399 - 416, The
operator-condition branch currently returns true for any Migrating condition;
change the loop that scans conds (inside the wait.Poll using getOp) to also
verify the expectedWriteKey is the target of that migration: only treat a
condition as success if c.Type == encryptionMigrationControllerProgressingType
&& c.Status == operatorv1.ConditionTrue && c.Reason == "Migrating" AND
(expectedWriteKey == "" OR the condition text contains expectedWriteKey). In
practice, update the condition check in the loop to inspect the condition's
message/text (e.g., c.Message) for expectedWriteKey and only return true when it
matches; keep the existing fallback to secret metadata polling and the t.Logf
call unchanged.


deadline := time.Now().Add(windowWait)
for time.Now().Before(deadline) {
meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector)
require.NoError(t, err)
if meta.Name == expectedWriteKey {
if allTargetGRsMigrated(meta, targetGRs) {
return false
}
return true
}
time.Sleep(kubePoll)
}
require.FailNow(t, fmt.Sprintf("timed out after %v waiting for migration in progress on key %q", windowWait, expectedWriteKey))
return false
}

func encryptionKeyNameToKeyID(name string) (uint64, bool) {
lastIdx := strings.LastIndex(name, "-")
idString := name
Expand Down
3 changes: 0 additions & 3 deletions test/library/encryption/perf_scenarios.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,8 @@ import (
"time"

configv1 "github.com/openshift/api/config/v1"
v1 "github.com/openshift/api/operator/v1"
)

type GetOperatorConditionsFuncType func(t testing.TB) ([]v1.OperatorCondition, error)

type PerfScenario struct {
BasicScenario
GetOperatorConditionsFunc GetOperatorConditionsFuncType
Expand Down
92 changes: 92 additions & 0 deletions test/library/encryption/scenarios.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,9 @@ type RotationScenario struct {
GetRawResourceFunc func(t testing.TB, clientSet ClientSet, namespace string) string
UnsupportedConfigFunc UpdateUnsupportedConfigFunc
EncryptionProvider configv1.APIServerEncryption
// GetOperatorConditionsFunc is optional. Overlap tests use it to detect an active migration via
// EncryptionMigrationControllerProgressing before falling back to polling encryption key secrets.
GetOperatorConditionsFunc GetOperatorConditionsFuncType
}

// TestEncryptionRotation first encrypts data with aescbc key
Expand All @@ -244,6 +247,14 @@ func TestEncryptionRotation(t testing.TB, scenario RotationScenario) {

// step 1: create the desired resource
e := NewE(t)
defer func() {
if err := ClearForcedKeyRotationReason(e, scenario.UnsupportedConfigFunc); err != nil {
e.Logf("cleanup: clear encryption rotation reason: %v", err)
if !t.Failed() {
require.NoError(e, err, "test cleanup: clear encryption rotation reason")
}
}
}()
clientSet := GetClients(e)
scenario.CreateResourceFunc(e, GetClients(e), ns)

Expand All @@ -268,3 +279,84 @@ func TestEncryptionRotation(t testing.TB, scenario RotationScenario) {

// TODO: assert conditions - operator and encryption migration controller must report status as active not progressing, and not failing for all scenarios
}

// TestEncryptionRotationDuringFirstMigration ensures storage starts from identity, turns encryption on
// (initial migration), forces a key rotation while that first migration is still running, then asserts
// convergence. Use this to exercise overlap between the first encrypt migration and an external rotation
// reason—not stacked rotations on an already-encrypted cluster.
func TestEncryptionRotationDuringFirstMigration(t testing.TB, scenario RotationScenario) {
ns := scenario.Namespace
labelSelector := scenario.LabelSelector

e := NewE(t)
defer func() {
if err := ClearForcedKeyRotationReason(e, scenario.UnsupportedConfigFunc); err != nil {
e.Logf("cleanup: clear encryption rotation reason: %v", err)
if !t.Failed() {
require.NoError(e, err, "test cleanup: clear encryption rotation reason")
}
}
}()
clientSet := GetClients(e)
scenario.CreateResourceFunc(e, clientSet, ns)

// ApplyAPIServerEncryptionType is a no-op when the APIServer is already on the target type; start from
// identity so the first storage migration always runs.
TestEncryptionTypeIdentity(t, scenario.BasicScenario)

prevMeta, err := GetLastKeyMeta(e, clientSet.Kube, ns, labelSelector)
require.NoError(e, err)
expectedFirstWriteKey, err := determineNextEncryptionKeyName(prevMeta.Name, labelSelector)
require.NoError(e, err)

ApplyAPIServerEncryptionType(e, clientSet, scenario.EncryptionProvider, scenario.TargetGRs)

if !WaitForEncryptionMigrationInProgressWindow(e, clientSet.Kube, scenario.GetOperatorConditionsFunc, expectedFirstWriteKey, scenario.TargetGRs, ns, labelSelector) {
t.Skipf("initial migration finished before an in-progress window was observed; set GetOperatorConditionsFunc or use a cluster where migration stays visible longer")
}

require.NoError(e, ForceKeyRotation(e, scenario.UnsupportedConfigFunc, fmt.Sprintf("test-rotation-during-first-migration-%s", rand.String(4))))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ForceKeyRotation has no effect on KMS. What is out plan here ? Do we want to run the new scenarios for all encryption providers ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ForceKeyRotation will be implemented differently later on

// n=2: one write-key revision from turning encryption on, one from ForceKeyRotation.
WaitForNRotations(e, clientSet.Kube, scenario.EncryptionProvider.Type, scenario.TargetGRs, ns, labelSelector, prevMeta, 2)

scenario.AssertFunc(e, clientSet, scenario.EncryptionProvider.Type, ns, labelSelector)
}

// TestEncryptionRotationDuringOngoingRotation runs with encryption already enabled and stable, then forces
// two key rotations in quick succession so the second happens while migration from the first is still
// in progress. This targets stacked external rotation reasons—not the first encrypt-from-identity path.
func TestEncryptionRotationDuringOngoingRotation(t testing.TB, scenario RotationScenario) {
ns := scenario.Namespace
labelSelector := scenario.LabelSelector

e := NewE(t)
defer func() {
if err := ClearForcedKeyRotationReason(e, scenario.UnsupportedConfigFunc); err != nil {
e.Logf("cleanup: clear encryption rotation reason: %v", err)
if !t.Failed() {
require.NoError(e, err, "test cleanup: clear encryption rotation reason")
}
}
}()
clientSet := GetClients(e)
scenario.CreateResourceFunc(e, clientSet, ns)

TestEncryptionType(t, scenario.BasicScenario, scenario.EncryptionProvider)

metaAfterEncrypt, err := GetLastKeyMeta(e, clientSet.Kube, ns, labelSelector)
require.NoError(e, err)
expectedNextWriteKey, err := determineNextEncryptionKeyName(metaAfterEncrypt.Name, labelSelector)
require.NoError(e, err)

require.NoError(e, ForceKeyRotation(e, scenario.UnsupportedConfigFunc, fmt.Sprintf("test-rotation-overlap-first-%s", rand.String(4))))

if !WaitForEncryptionMigrationInProgressWindow(e, clientSet.Kube, scenario.GetOperatorConditionsFunc, expectedNextWriteKey, scenario.TargetGRs, ns, labelSelector) {
t.Skipf("migration after first forced rotation finished before an in-progress window was observed; set GetOperatorConditionsFunc or use a slower cluster")
}

require.NoError(e, ForceKeyRotation(e, scenario.UnsupportedConfigFunc, fmt.Sprintf("test-rotation-overlap-second-%s", rand.String(4))))
// n=2: two ForceKeyRotation steps after metaAfterEncrypt.
WaitForNRotations(e, clientSet.Kube, scenario.EncryptionProvider.Type, scenario.TargetGRs, ns, labelSelector, metaAfterEncrypt, 2)

scenario.AssertFunc(e, clientSet, scenario.EncryptionProvider.Type, ns, labelSelector)
}