diff --git a/test/library/encryption/helpers.go b/test/library/encryption/helpers.go index 326dccfd5e..993925d055 100644 --- a/test/library/encryption/helpers.go +++ b/test/library/encryption/helpers.go @@ -22,6 +22,7 @@ import ( "k8s.io/client-go/util/retry" configv1 "github.com/openshift/api/config/v1" + operatorv1 "github.com/openshift/api/operator/v1" configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" "github.com/openshift/library-go/test/library" @@ -56,13 +57,14 @@ type EncryptionKeyMeta struct { type UpdateUnsupportedConfigFunc func(raw []byte) error -func SetAndWaitForEncryptionType(t testing.TB, provider configv1.APIServerEncryption, defaultTargetGRs []schema.GroupResource, namespace, labelSelector string) ClientSet { - t.Helper() - t.Logf("Starting encryption e2e test for %q mode", provider.Type) +// GetOperatorConditionsFuncType fetches operator conditions (e.g. for EncryptionMigrationControllerProgressing). +type GetOperatorConditionsFuncType func(t testing.TB) ([]operatorv1.OperatorCondition, error) - clientSet := GetClients(t) - lastMigratedKeyMeta, err := GetLastKeyMeta(t, clientSet.Kube, namespace, labelSelector) - require.NoError(t, err) +// ApplyAPIServerEncryptionType updates the cluster APIServer encryption spec to the desired provider without +// waiting for storage migration. Callers that need previous key metadata should call GetLastKeyMeta before this. +func ApplyAPIServerEncryptionType(t testing.TB, clientSet ClientSet, provider configv1.APIServerEncryption, _ []schema.GroupResource) { + t.Helper() + t.Logf("Applying APIServer encryption type %q (not waiting for migration)", provider.Type) apiServer, err := clientSet.ApiServerConfig.Get(context.TODO(), "cluster", metav1.GetOptions{}) require.NoError(t, err) @@ -75,6 +77,17 @@ func SetAndWaitForEncryptionType(t testing.TB, provider configv1.APIServerEncryp } else { t.Logf("APIServer is already configured to use %q mode", provider.Type) } +} + +func SetAndWaitForEncryptionType(t testing.TB, provider configv1.APIServerEncryption, defaultTargetGRs []schema.GroupResource, namespace, labelSelector string) ClientSet { + t.Helper() + t.Logf("Starting encryption e2e test for %q mode", provider.Type) + + clientSet := GetClients(t) + lastMigratedKeyMeta, err := GetLastKeyMeta(t, clientSet.Kube, namespace, labelSelector) + require.NoError(t, err) + + ApplyAPIServerEncryptionType(t, clientSet, provider, defaultTargetGRs) WaitForEncryptionKeyBasedOn(t, clientSet.Kube, lastMigratedKeyMeta, provider.Type, defaultTargetGRs, namespace, labelSelector) return clientSet @@ -276,6 +289,27 @@ func ForceKeyRotation(t testing.TB, updateUnsupportedConfig UpdateUnsupportedCon }) } +// ClearForcedKeyRotationReason clears encryption.reason under UnsupportedConfigOverrides (same merge path as +// ForceKeyRotation). Call when a test finishes so the next test in sequence does not inherit a non-empty +// reason and the key controller does not keep seeing an external rotation request. +func ClearForcedKeyRotationReason(t testing.TB, updateUnsupportedConfig UpdateUnsupportedConfigFunc) error { + t.Helper() + t.Logf("Clearing forced encryption rotation reason (unsupported config overrides)") + data := map[string]map[string]string{ + "encryption": { + "reason": "", + }, + } + raw, err := json.Marshal(data) + if err != nil { + return err + } + + return onErrorWithTimeout(wait.ForeverTestTimeout, retry.DefaultBackoff, orError(errors.IsConflict, transientAPIError), func() error { + return updateUnsupportedConfig(raw) + }) +} + // hasResource returns whether the given group resource is contained in the migrated group resource list. func hasResource(expectedResource schema.GroupResource, actualResources []schema.GroupResource) bool { for _, gr := range actualResources { @@ -286,6 +320,117 @@ func hasResource(expectedResource schema.GroupResource, actualResources []schema return false } +const encryptionMigrationControllerProgressingType = "EncryptionMigrationControllerProgressing" + +// allTargetGRsMigrated reports whether every resource in targetGRs appears in meta's migrated list. +func allTargetGRsMigrated(meta EncryptionKeyMeta, targetGRs []schema.GroupResource) bool { + if len(targetGRs) == 0 { + return true + } + for _, gr := range targetGRs { + if !hasResource(gr, meta.Migrated) { + return false + } + } + return true +} + +// WaitUntilEncryptionStable waits until the latest write key secret reports the expected mode and all target resources are migrated. +func WaitUntilEncryptionStable(t testing.TB, kube kubernetes.Interface, expectedMode configv1.EncryptionType, targetGRs []schema.GroupResource, namespace, labelSelector string) { + t.Helper() + wantMode := string(expectedMode) + err := wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) { + meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector) + if err != nil { + return false, err + } + if meta.Mode != wantMode { + return false, nil + } + if !allTargetGRsMigrated(meta, targetGRs) { + return false, nil + } + return true, nil + }) + require.NoError(t, err) +} + +// WaitForNRotations waits until encryption is stable (expectedMode on the latest write key and every target +// group resource migrated), then asserts the latest write key secret's numeric suffix equals the baseline +// secret's suffix plus n. If baselineMeta.Name is empty (no prior write key), the baseline suffix is treated +// as 0. Use n to count new write-key revisions you expect after baselineMeta was captured (for example one +// per successful ForceKeyRotation, plus any additional revision from turning encryption on). +func WaitForNRotations(t testing.TB, kube kubernetes.Interface, expectedMode configv1.EncryptionType, targetGRs []schema.GroupResource, namespace, labelSelector string, baselineMeta EncryptionKeyMeta, n uint64) { + t.Helper() + WaitUntilEncryptionStable(t, kube, expectedMode, targetGRs, namespace, labelSelector) + + finalMeta, err := GetLastKeyMeta(t, kube, namespace, labelSelector) + require.NoError(t, err) + finalID, ok := EncryptionWriteKeySecretID(finalMeta.Name) + require.True(t, ok, "latest encryption key name must carry a numeric suffix: %q", finalMeta.Name) + + var baselineID uint64 + if len(baselineMeta.Name) == 0 { + baselineID = 0 + } else { + var baselineOK bool + baselineID, baselineOK = EncryptionWriteKeySecretID(baselineMeta.Name) + require.True(t, baselineOK, "baseline encryption key name must carry a numeric suffix: %q", baselineMeta.Name) + } + + expectedFinalID := baselineID + n + require.Equal(t, expectedFinalID, finalID, "expected write-key id %d (baseline id %d + %d), got final key %q id %d", expectedFinalID, baselineID, n, finalMeta.Name, finalID) +} + +// EncryptionWriteKeySecretID returns the numeric suffix of an encryption write-key secret name (the value +// used when sorting keys by revision, e.g. encryption-key-openshift-apiserver-4 yields 4). +func EncryptionWriteKeySecretID(secretName string) (uint64, bool) { + return encryptionKeyNameToKeyID(secretName) +} + +// WaitForEncryptionMigrationInProgressWindow waits until storage migration is actively running so another +// encryption change can be stacked. It returns false when the migration for expectedWriteKey completed +// before an in-progress snapshot could be observed (caller may t.Skip). +func WaitForEncryptionMigrationInProgressWindow(t testing.TB, kube kubernetes.Interface, getOp GetOperatorConditionsFuncType, expectedWriteKey string, targetGRs []schema.GroupResource, namespace, labelSelector string) bool { + t.Helper() + const kubePoll = 1 * time.Second + const windowWait = 25 * time.Minute + + if getOp != nil { + err := wait.Poll(2*time.Second, windowWait, func() (bool, error) { + conds, err := getOp(t) + if err != nil { + return false, err + } + for _, c := range conds { + if c.Type == encryptionMigrationControllerProgressingType && c.Status == operatorv1.ConditionTrue && c.Reason == "Migrating" { + return true, nil + } + } + return false, nil + }) + if err == nil { + return true + } + t.Logf("encryption migration progressing condition not observed within %v, falling back to secret metadata polling: %v", windowWait, err) + } + + deadline := time.Now().Add(windowWait) + for time.Now().Before(deadline) { + meta, err := GetLastKeyMeta(t, kube, namespace, labelSelector) + require.NoError(t, err) + if meta.Name == expectedWriteKey { + if allTargetGRsMigrated(meta, targetGRs) { + return false + } + return true + } + time.Sleep(kubePoll) + } + require.FailNow(t, fmt.Sprintf("timed out after %v waiting for migration in progress on key %q", windowWait, expectedWriteKey)) + return false +} + func encryptionKeyNameToKeyID(name string) (uint64, bool) { lastIdx := strings.LastIndex(name, "-") idString := name diff --git a/test/library/encryption/perf_scenarios.go b/test/library/encryption/perf_scenarios.go index 0095f20704..b413a30e07 100644 --- a/test/library/encryption/perf_scenarios.go +++ b/test/library/encryption/perf_scenarios.go @@ -5,11 +5,8 @@ import ( "time" configv1 "github.com/openshift/api/config/v1" - v1 "github.com/openshift/api/operator/v1" ) -type GetOperatorConditionsFuncType func(t testing.TB) ([]v1.OperatorCondition, error) - type PerfScenario struct { BasicScenario GetOperatorConditionsFunc GetOperatorConditionsFuncType diff --git a/test/library/encryption/scenarios.go b/test/library/encryption/scenarios.go index 7b2a154e62..955eea8b1d 100644 --- a/test/library/encryption/scenarios.go +++ b/test/library/encryption/scenarios.go @@ -233,6 +233,9 @@ type RotationScenario struct { GetRawResourceFunc func(t testing.TB, clientSet ClientSet, namespace string) string UnsupportedConfigFunc UpdateUnsupportedConfigFunc EncryptionProvider configv1.APIServerEncryption + // GetOperatorConditionsFunc is optional. Overlap tests use it to detect an active migration via + // EncryptionMigrationControllerProgressing before falling back to polling encryption key secrets. + GetOperatorConditionsFunc GetOperatorConditionsFuncType } // TestEncryptionRotation first encrypts data with aescbc key @@ -244,6 +247,14 @@ func TestEncryptionRotation(t testing.TB, scenario RotationScenario) { // step 1: create the desired resource e := NewE(t) + defer func() { + if err := ClearForcedKeyRotationReason(e, scenario.UnsupportedConfigFunc); err != nil { + e.Logf("cleanup: clear encryption rotation reason: %v", err) + if !t.Failed() { + require.NoError(e, err, "test cleanup: clear encryption rotation reason") + } + } + }() clientSet := GetClients(e) scenario.CreateResourceFunc(e, GetClients(e), ns) @@ -268,3 +279,84 @@ func TestEncryptionRotation(t testing.TB, scenario RotationScenario) { // TODO: assert conditions - operator and encryption migration controller must report status as active not progressing, and not failing for all scenarios } + +// TestEncryptionRotationDuringFirstMigration ensures storage starts from identity, turns encryption on +// (initial migration), forces a key rotation while that first migration is still running, then asserts +// convergence. Use this to exercise overlap between the first encrypt migration and an external rotation +// reason—not stacked rotations on an already-encrypted cluster. +func TestEncryptionRotationDuringFirstMigration(t testing.TB, scenario RotationScenario) { + ns := scenario.Namespace + labelSelector := scenario.LabelSelector + + e := NewE(t) + defer func() { + if err := ClearForcedKeyRotationReason(e, scenario.UnsupportedConfigFunc); err != nil { + e.Logf("cleanup: clear encryption rotation reason: %v", err) + if !t.Failed() { + require.NoError(e, err, "test cleanup: clear encryption rotation reason") + } + } + }() + clientSet := GetClients(e) + scenario.CreateResourceFunc(e, clientSet, ns) + + // ApplyAPIServerEncryptionType is a no-op when the APIServer is already on the target type; start from + // identity so the first storage migration always runs. + TestEncryptionTypeIdentity(t, scenario.BasicScenario) + + prevMeta, err := GetLastKeyMeta(e, clientSet.Kube, ns, labelSelector) + require.NoError(e, err) + expectedFirstWriteKey, err := determineNextEncryptionKeyName(prevMeta.Name, labelSelector) + require.NoError(e, err) + + ApplyAPIServerEncryptionType(e, clientSet, scenario.EncryptionProvider, scenario.TargetGRs) + + if !WaitForEncryptionMigrationInProgressWindow(e, clientSet.Kube, scenario.GetOperatorConditionsFunc, expectedFirstWriteKey, scenario.TargetGRs, ns, labelSelector) { + t.Skipf("initial migration finished before an in-progress window was observed; set GetOperatorConditionsFunc or use a cluster where migration stays visible longer") + } + + require.NoError(e, ForceKeyRotation(e, scenario.UnsupportedConfigFunc, fmt.Sprintf("test-rotation-during-first-migration-%s", rand.String(4)))) + // n=2: one write-key revision from turning encryption on, one from ForceKeyRotation. + WaitForNRotations(e, clientSet.Kube, scenario.EncryptionProvider.Type, scenario.TargetGRs, ns, labelSelector, prevMeta, 2) + + scenario.AssertFunc(e, clientSet, scenario.EncryptionProvider.Type, ns, labelSelector) +} + +// TestEncryptionRotationDuringOngoingRotation runs with encryption already enabled and stable, then forces +// two key rotations in quick succession so the second happens while migration from the first is still +// in progress. This targets stacked external rotation reasons—not the first encrypt-from-identity path. +func TestEncryptionRotationDuringOngoingRotation(t testing.TB, scenario RotationScenario) { + ns := scenario.Namespace + labelSelector := scenario.LabelSelector + + e := NewE(t) + defer func() { + if err := ClearForcedKeyRotationReason(e, scenario.UnsupportedConfigFunc); err != nil { + e.Logf("cleanup: clear encryption rotation reason: %v", err) + if !t.Failed() { + require.NoError(e, err, "test cleanup: clear encryption rotation reason") + } + } + }() + clientSet := GetClients(e) + scenario.CreateResourceFunc(e, clientSet, ns) + + TestEncryptionType(t, scenario.BasicScenario, scenario.EncryptionProvider) + + metaAfterEncrypt, err := GetLastKeyMeta(e, clientSet.Kube, ns, labelSelector) + require.NoError(e, err) + expectedNextWriteKey, err := determineNextEncryptionKeyName(metaAfterEncrypt.Name, labelSelector) + require.NoError(e, err) + + require.NoError(e, ForceKeyRotation(e, scenario.UnsupportedConfigFunc, fmt.Sprintf("test-rotation-overlap-first-%s", rand.String(4)))) + + if !WaitForEncryptionMigrationInProgressWindow(e, clientSet.Kube, scenario.GetOperatorConditionsFunc, expectedNextWriteKey, scenario.TargetGRs, ns, labelSelector) { + t.Skipf("migration after first forced rotation finished before an in-progress window was observed; set GetOperatorConditionsFunc or use a slower cluster") + } + + require.NoError(e, ForceKeyRotation(e, scenario.UnsupportedConfigFunc, fmt.Sprintf("test-rotation-overlap-second-%s", rand.String(4)))) + // n=2: two ForceKeyRotation steps after metaAfterEncrypt. + WaitForNRotations(e, clientSet.Kube, scenario.EncryptionProvider.Type, scenario.TargetGRs, ns, labelSelector, metaAfterEncrypt, 2) + + scenario.AssertFunc(e, clientSet, scenario.EncryptionProvider.Type, ns, labelSelector) +}