From ad7aaecc87907dc66f9083c635ec27e23eca43ca Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Wed, 8 Apr 2026 10:02:21 +0200 Subject: [PATCH 01/10] sync logic rewrite + tests and constants --- api/v4/postgrescluster_types.go | 3 +- .../postgrescluster_controller_test.go | 2 +- .../postgresdatabase_controller_test.go | 225 +++++-- pkg/postgresql/cluster/core/cluster.go | 556 +++++++++++++++--- .../cluster/core/cluster_unit_test.go | 135 +++++ pkg/postgresql/cluster/core/types.go | 50 +- .../cluster/core/types/constants/state.go | 41 ++ pkg/postgresql/database/core/database.go | 133 +++-- .../database/core/database_unit_test.go | 163 ++++- pkg/postgresql/database/core/types.go | 3 +- 10 files changed, 1094 insertions(+), 217 deletions(-) create mode 100644 pkg/postgresql/cluster/core/types/constants/state.go diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 3e3dd0da7..5adc91f13 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -36,8 +36,7 @@ type ManagedRole struct { // Exists controls whether the role should be present (true) or absent (false) in PostgreSQL. // +kubebuilder:default=true - // +optional - Exists bool `json:"exists,omitempty"` + Exists bool `json:"exists"` } // PostgresClusterSpec defines the desired state of PostgresCluster. diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index ea4d66f64..5687ae1f8 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -51,7 +51,7 @@ import ( * PC-09 ignores no-op updates */ -var _ = Describe("PostgresCluster Controller", func() { +var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { const ( postgresVersion = "15.10" diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index a1f5ed9ba..31f591573 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -40,6 +40,44 @@ import ( const postgresDatabaseFinalizer = "postgresdatabases.enterprise.splunk.com/finalizer" +// condition types +const ( + condClusterReady = "ClusterReady" + condSecretsReady = "SecretsReady" + condConfigMapsReady = "ConfigMapsReady" + condRolesReady = "RolesReady" + condDatabasesReady = "DatabasesReady" + condPrivilegesReady = "PrivilegesReady" +) + +// condition reasons +const ( + reasonClusterNotFound = "ClusterNotFound" + reasonClusterAvailable = "ClusterAvailable" + reasonSecretsCreated = "SecretsCreated" + reasonConfigMapsCreated = "ConfigMapsCreated" + reasonUsersAvailable = "UsersAvailable" + reasonDatabasesAvailable = "DatabasesAvailable" + reasonRoleConflict = "RoleConflict" +) + +// phases +const ( + phasePending = "Pending" + phaseReady = "Ready" + phaseFailed = "Failed" +) + +// annotations +const retainedFromAnnotation = "enterprise.splunk.com/retained-from" + +// database names used across tests +const ( + dbAppdb = "appdb" + dbKeepdb = "payments" + dbDropdb = "analytics" +) + func reconcilePostgresDatabase(ctx context.Context, nn types.NamespacedName) (ctrl.Result, error) { reconciler := &PostgresDatabaseReconciler{ Client: k8sClient, @@ -57,12 +95,20 @@ func managedRoleNames(roles []enterprisev4.ManagedRole) []string { return names } -func adminRoleNameForTest(dbName string) string { - return dbName + "_admin" -} +func adminRoleNameForTest(dbName string) string { return dbName + "_admin" } +func rwRoleNameForTest(dbName string) string { return dbName + "_rw" } -func rwRoleNameForTest(dbName string) string { - return dbName + "_rw" +func adminSecretNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-admin", resourceName, dbName) +} +func rwSecretNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-rw", resourceName, dbName) +} +func configMapNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-config", resourceName, dbName) +} +func cnpgDatabaseNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s", resourceName, dbName) } func ownedByPostgresDatabase(postgresDB *enterprisev4.PostgresDatabase) []metav1.OwnerReference { @@ -208,17 +254,17 @@ func seedExistingDatabaseStatus(ctx context.Context, current *enterprisev4.Postg func expectProvisionedArtifacts(ctx context.Context, scenario readyClusterScenario, owner *enterprisev4.PostgresDatabase) { adminSecret := &corev1.Secret{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, adminSecret)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: adminSecretNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, adminSecret)).To(Succeed()) Expect(adminSecret.Data).To(HaveKey("password")) Expect(metav1.IsControlledBy(adminSecret, owner)).To(BeTrue()) rwSecret := &corev1.Secret{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-rw", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, rwSecret)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: rwSecretNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, rwSecret)).To(Succeed()) Expect(rwSecret.Data).To(HaveKey("password")) Expect(metav1.IsControlledBy(rwSecret, owner)).To(BeTrue()) configMap := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("ro-host", "tenant-ro."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("admin-user", adminRoleNameForTest(scenario.dbName))) @@ -234,7 +280,7 @@ func expectManagedRolesPatched(ctx context.Context, scenario readyClusterScenari func expectCNPGDatabaseCreated(ctx context.Context, scenario readyClusterScenario, owner *enterprisev4.PostgresDatabase) *cnpgv1.Database { cnpgDatabase := &cnpgv1.Database{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, cnpgDatabase)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cnpgDatabaseNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, cnpgDatabase)).To(Succeed()) Expect(cnpgDatabase.Spec.Name).To(Equal(scenario.dbName)) Expect(cnpgDatabase.Spec.Owner).To(Equal(adminRoleNameForTest(scenario.dbName))) Expect(cnpgDatabase.Spec.ClusterRef.Name).To(Equal(scenario.cnpgClusterName)) @@ -250,18 +296,18 @@ func markCNPGDatabaseApplied(ctx context.Context, cnpgDatabase *cnpgv1.Database) func expectPoolerConfigMap(ctx context.Context, scenario readyClusterScenario) { configMap := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) Expect(configMap.Data).To(HaveKeyWithValue("pooler-rw-host", scenario.cnpgClusterName+"-pooler-rw."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("pooler-ro-host", scenario.cnpgClusterName+"-pooler-ro."+scenario.namespace+".svc.cluster.local")) } func seedMissingClusterScenario(ctx context.Context, namespace, resourceName string, finalizers ...string) types.NamespacedName { - createPostgresDatabaseResource(ctx, namespace, resourceName, "absent-cluster", []enterprisev4.DatabaseDefinition{{Name: "appdb"}}, finalizers...) + createPostgresDatabaseResource(ctx, namespace, resourceName, "absent-cluster", []enterprisev4.DatabaseDefinition{{Name: dbAppdb}}, finalizers...) return types.NamespacedName{Name: resourceName, Namespace: namespace} } func seedConflictScenario(ctx context.Context, namespace, resourceName, clusterName string) types.NamespacedName { - createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{{Name: "appdb"}}, postgresDatabaseFinalizer) + createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{{Name: dbAppdb}}, postgresDatabaseFinalizer) postgresCluster := createPostgresClusterResource(ctx, namespace, clusterName) markPostgresClusterReady(ctx, postgresCluster, "unused-cnpg", namespace, false) return types.NamespacedName{Name: resourceName, Namespace: namespace} @@ -272,7 +318,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl for _, dbName := range dbNames { Expect(k8sClient.Create(ctx, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-admin", resourceName, dbName), + Name: adminSecretNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -280,7 +326,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-rw", resourceName, dbName), + Name: rwSecretNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -288,7 +334,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-config", resourceName, dbName), + Name: configMapNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -296,7 +342,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", resourceName, dbName), + Name: cnpgDatabaseNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -309,9 +355,18 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl } } +func expectManagedRoleExists(cluster *enterprisev4.PostgresCluster, roleName string, exists bool) { + rolesByName := make(map[string]enterprisev4.ManagedRole, len(cluster.Spec.ManagedRoles)) + for _, r := range cluster.Spec.ManagedRoles { + rolesByName[r.Name] = r + } + Expect(rolesByName).To(HaveKey(roleName)) + Expect(rolesByName[roleName].Exists).To(Equal(exists), "role %s: expected Exists=%v", roleName, exists) +} + func expectRetainedArtifact(ctx context.Context, name, namespace, resourceName string, obj client.Object) { Expect(k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, obj)).To(Succeed()) - Expect(obj.GetAnnotations()).To(HaveKeyWithValue("enterprise.splunk.com/retained-from", resourceName)) + Expect(obj.GetAnnotations()).To(HaveKeyWithValue(retainedFromAnnotation, resourceName)) Expect(obj.GetOwnerReferences()).To(BeEmpty()) } @@ -333,7 +388,7 @@ func expectStatusCondition(current *enterprisev4.PostgresDatabase, conditionType } func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, expectedDatabase enterprisev4.DatabaseInfo) { - expectStatusPhase(current, "Ready") + expectStatusPhase(current, phaseReady) Expect(current.Status.ObservedGeneration).NotTo(BeNil()) Expect(*current.Status.ObservedGeneration).To(Equal(generation)) Expect(current.Status.Databases).To(HaveLen(1)) @@ -344,7 +399,7 @@ func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, Expect(current.Status.Databases[0].ConfigMapRef).NotTo(BeNil()) } -var _ = Describe("PostgresDatabase Controller", func() { +var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { var ( ctx context.Context namespace string @@ -384,9 +439,9 @@ var _ = Describe("PostgresDatabase Controller", func() { expectReconcileResult(result, err, 30*time.Second) current := fetchPostgresDatabase(ctx, requestName) - expectStatusPhase(current, "Pending") - expectStatusCondition(current, "ClusterReady", metav1.ConditionFalse, "ClusterNotFound") - clusterReady := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") + expectStatusPhase(current, phasePending) + expectStatusCondition(current, condClusterReady, metav1.ConditionFalse, reasonClusterNotFound) + clusterReady := meta.FindStatusCondition(current.Status.Conditions, condClusterReady) Expect(clusterReady.ObservedGeneration).To(Equal(current.Generation)) }) }) @@ -395,7 +450,7 @@ var _ = Describe("PostgresDatabase Controller", func() { When("the referenced PostgresCluster is ready", func() { Context("and live grants are not invoked", func() { It("reconciles secrets, configmaps, roles, and CNPG databases", func() { - scenario := newReadyClusterScenario(namespace, "ready-cluster", "tenant-cluster", "tenant-cnpg", "appdb") + scenario := newReadyClusterScenario(namespace, "ready-cluster", "tenant-cluster", "tenant-cnpg", dbAppdb) seedReadyClusterScenario(ctx, scenario, false) result, err := reconcilePostgresDatabase(ctx, scenario.requestName) @@ -419,18 +474,18 @@ var _ = Describe("PostgresDatabase Controller", func() { current = fetchPostgresDatabase(ctx, scenario.requestName) expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) - expectStatusCondition(current, "ClusterReady", metav1.ConditionTrue, "ClusterAvailable") - expectStatusCondition(current, "SecretsReady", metav1.ConditionTrue, "SecretsCreated") - expectStatusCondition(current, "ConfigMapsReady", metav1.ConditionTrue, "ConfigMapsCreated") - expectStatusCondition(current, "RolesReady", metav1.ConditionTrue, "UsersAvailable") - expectStatusCondition(current, "DatabasesReady", metav1.ConditionTrue, "DatabasesAvailable") - Expect(meta.FindStatusCondition(current.Status.Conditions, "PrivilegesReady")).To(BeNil()) + expectStatusCondition(current, condClusterReady, metav1.ConditionTrue, reasonClusterAvailable) + expectStatusCondition(current, condSecretsReady, metav1.ConditionTrue, reasonSecretsCreated) + expectStatusCondition(current, condConfigMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated) + expectStatusCondition(current, condRolesReady, metav1.ConditionTrue, reasonUsersAvailable) + expectStatusCondition(current, condDatabasesReady, metav1.ConditionTrue, reasonDatabasesAvailable) + Expect(meta.FindStatusCondition(current.Status.Conditions, condPrivilegesReady)).To(BeNil()) }) }) Context("and connection pooling is enabled", func() { It("adds pooler endpoints to the generated ConfigMap", func() { - scenario := newReadyClusterScenario(namespace, "pooler-cluster", "pooler-postgres", "pooler-cnpg", "appdb") + scenario := newReadyClusterScenario(namespace, "pooler-cluster", "pooler-postgres", "pooler-cnpg", dbAppdb) seedReadyClusterScenario(ctx, scenario, true) result, err := reconcilePostgresDatabase(ctx, scenario.requestName) @@ -462,8 +517,8 @@ var _ = Describe("PostgresDatabase Controller", func() { }, "spec": map[string]any{ "managedRoles": []map[string]any{ - {"name": "appdb_admin", "exists": true}, - {"name": "appdb_rw", "exists": true}, + {"name": adminRoleNameForTest(dbAppdb), "exists": true}, + {"name": rwRoleNameForTest(dbAppdb), "exists": true}, }, }, }, @@ -476,23 +531,79 @@ var _ = Describe("PostgresDatabase Controller", func() { Expect(result).To(Equal(ctrl.Result{})) current := fetchPostgresDatabase(ctx, requestName) - expectStatusPhase(current, "Failed") - expectStatusCondition(current, "RolesReady", metav1.ConditionFalse, "RoleConflict") + expectStatusPhase(current, phaseFailed) + expectStatusCondition(current, condRolesReady, metav1.ConditionFalse, reasonRoleConflict) - rolesReady := meta.FindStatusCondition(current.Status.Conditions, "RolesReady") - Expect(rolesReady.Message).To(ContainSubstring("appdb_admin")) + rolesReady := meta.FindStatusCondition(current.Status.Conditions, condRolesReady) + Expect(rolesReady.Message).To(ContainSubstring(adminRoleNameForTest(dbAppdb))) Expect(rolesReady.Message).To(ContainSubstring("postgresdatabase-legacy")) configMap := &corev1.ConfigMap{} - err = k8sClient.Get(ctx, types.NamespacedName{Name: "conflict-cluster-appdb-config", Namespace: namespace}, configMap) + err = k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest("conflict-cluster", dbAppdb), Namespace: namespace}, configMap) Expect(apierrors.IsNotFound(err)).To(BeTrue()) cnpgDatabase := &cnpgv1.Database{} - err = k8sClient.Get(ctx, types.NamespacedName{Name: "conflict-cluster-appdb", Namespace: namespace}, cnpgDatabase) + err = k8sClient.Get(ctx, types.NamespacedName{Name: cnpgDatabaseNameForTest("conflict-cluster", dbAppdb), Namespace: namespace}, cnpgDatabase) Expect(apierrors.IsNotFound(err)).To(BeTrue()) }) }) + When("a database is removed from spec.databases while the CR stays alive", func() { + It("marks the removed database roles as absent in postgres cluster and keeps the retained roles present", func() { + resourceName := "live-db-removal" + clusterName := "live-db-removal-postgres" + cnpgClusterName := "live-db-removal-cnpg" + requestName := types.NamespacedName{Name: resourceName, Namespace: namespace} + + postgresDB := createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{ + {Name: dbKeepdb}, + {Name: dbDropdb}, + }, postgresDatabaseFinalizer) + Expect(k8sClient.Get(ctx, requestName, postgresDB)).To(Succeed()) + + postgresCluster := createPostgresClusterResource(ctx, namespace, clusterName) + markPostgresClusterReady(ctx, postgresCluster, cnpgClusterName, namespace, false) + cnpgCluster := createCNPGClusterResource(ctx, namespace, cnpgClusterName) + markCNPGClusterReady(ctx, cnpgCluster, []string{ + adminRoleNameForTest(dbKeepdb), rwRoleNameForTest(dbKeepdb), + adminRoleNameForTest(dbDropdb), rwRoleNameForTest(dbDropdb), + }, "tenant-rw", "tenant-ro") + + initialRolesPatch := &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": enterprisev4.GroupVersion.String(), + "kind": "PostgresCluster", + "metadata": map[string]any{"name": clusterName, "namespace": namespace}, + "spec": map[string]any{ + "managedRoles": []map[string]any{ + {"name": adminRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-rw", "key": "password"}}, + }, + }, + }, + } + Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-"+resourceName))).To(Succeed()) + + seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, dbKeepdb, dbDropdb) + + postgresDB.Spec.Databases = []enterprisev4.DatabaseDefinition{{Name: dbKeepdb}} + Expect(k8sClient.Update(ctx, postgresDB)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, requestName) + expectReconcileResult(result, err, 15*time.Second) + + updatedCluster := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) + + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbDropdb), false) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbDropdb), false) + }) + }) + When("the PostgresDatabase is being deleted", func() { Context("with retained and deleted databases", func() { It("orphans retained resources, removes deleted resources, and patches managed roles", func() { @@ -501,8 +612,8 @@ var _ = Describe("PostgresDatabase Controller", func() { requestName := types.NamespacedName{Name: resourceName, Namespace: namespace} postgresDB := createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{ - {Name: "keepdb", DeletionPolicy: "Retain"}, - {Name: "dropdb"}, + {Name: dbKeepdb, DeletionPolicy: "Retain"}, + {Name: dbDropdb}, }, postgresDatabaseFinalizer) Expect(k8sClient.Get(ctx, requestName, postgresDB)).To(Succeed()) @@ -518,36 +629,40 @@ var _ = Describe("PostgresDatabase Controller", func() { }, "spec": map[string]any{ "managedRoles": []map[string]any{ - {"name": "keepdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-keepdb-admin", "key": "password"}}, - {"name": "keepdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-keepdb-rw", "key": "password"}}, - {"name": "dropdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-dropdb-admin", "key": "password"}}, - {"name": "dropdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-dropdb-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-rw", "key": "password"}}, }, }, }, } - Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-delete-cluster"))).To(Succeed()) + Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-"+resourceName))).To(Succeed()) - seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, "keepdb", "dropdb") + seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, dbKeepdb, dbDropdb) Expect(k8sClient.Delete(ctx, postgresDB)).To(Succeed()) result, err := reconcilePostgresDatabase(ctx, requestName) expectEmptyReconcileResult(result, err) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-config", namespace, resourceName, &corev1.ConfigMap{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-admin", namespace, resourceName, &corev1.Secret{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-rw", namespace, resourceName, &corev1.Secret{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb", namespace, resourceName, &cnpgv1.Database{}) + expectRetainedArtifact(ctx, configMapNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.ConfigMap{}) + expectRetainedArtifact(ctx, adminSecretNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.Secret{}) + expectRetainedArtifact(ctx, rwSecretNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.Secret{}) + expectRetainedArtifact(ctx, cnpgDatabaseNameForTest(resourceName, dbKeepdb), namespace, resourceName, &cnpgv1.Database{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-config", namespace, &corev1.ConfigMap{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-admin", namespace, &corev1.Secret{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-rw", namespace, &corev1.Secret{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb", namespace, &cnpgv1.Database{}) + expectDeletedArtifact(ctx, configMapNameForTest(resourceName, dbDropdb), namespace, &corev1.ConfigMap{}) + expectDeletedArtifact(ctx, adminSecretNameForTest(resourceName, dbDropdb), namespace, &corev1.Secret{}) + expectDeletedArtifact(ctx, rwSecretNameForTest(resourceName, dbDropdb), namespace, &corev1.Secret{}) + expectDeletedArtifact(ctx, cnpgDatabaseNameForTest(resourceName, dbDropdb), namespace, &cnpgv1.Database{}) updatedCluster := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) - Expect(managedRoleNames(updatedCluster.Spec.ManagedRoles)).To(ConsistOf("keepdb_admin", "keepdb_rw")) + + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbDropdb), false) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbDropdb), false) current := &enterprisev4.PostgresDatabase{} err = k8sClient.Get(ctx, requestName, current) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 3334011c6..81a928595 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -24,6 +24,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -285,6 +286,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. // Reconcile Connection Pooler. poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) if err != nil { @@ -445,45 +447,475 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } } - // Final status sync. - var oldPhase string - if postgresCluster.Status.Phase != nil { - oldPhase = *postgresCluster.Status.Phase + // Aggregate component readiness from iterative health checks. + state := pgcConstants.EmptyState + conditions := []clusterReadynessCheck{ + newProvisionerHealthCheck(postgresCluster, cnpgCluster), + newPoolerHealthCheck(c, postgresCluster, poolerEnabled, poolerConfigPresent), + newConfigMapHealthCheck(c, postgresCluster), + newSecretHealthCheck(c, postgresCluster), } - if err := syncStatus(ctx, c, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to sync status") - if apierrors.IsConflict(err) { - logger.Info("Conflict during status update, will requeue") - return ctrl.Result{Requeue: true}, nil + + for _, check := range conditions { + componentHealth, err := check.Condition(ctx) + if err != nil { + if statusErr := updateStatus(componentHealth.Condition, metav1.ConditionFalse, componentHealth.Reason, componentHealth.Message, componentHealth.Phase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, statusErr + } + logger.Error(err, "Component health check reported issues", + "component", check, + "requeueAfter", componentHealth.Result.RequeueAfter, + "reason", componentHealth.Reason) + return componentHealth.Result, err + } + + if isPendingState(componentHealth.State) { + if statusErr := updateStatus(componentHealth.Condition, metav1.ConditionFalse, componentHealth.Reason, componentHealth.Message, componentHealth.Phase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, statusErr + } + return componentHealth.Result, nil } - return ctrl.Result{}, fmt.Errorf("failed to sync status: %w", err) + state |= componentHealth.State } - var newPhase string - if postgresCluster.Status.Phase != nil { - newPhase = *postgresCluster.Status.Phase + + if state&pgcConstants.ComponentsReady == pgcConstants.ComponentsReady { + logger.Info("Reconciliation complete") + if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, err + } + return ctrl.Result{}, nil } - rc.emitClusterPhaseTransition(postgresCluster, oldPhase, newPhase) - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), - Namespace: postgresCluster.Namespace, - }, rwPooler) - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), - Namespace: postgresCluster.Namespace, - }, roPooler) - if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { - logger.Info("Poolers ready, syncing status") - poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(poolerOldConditions, postgresCluster.Status.Conditions) - _ = syncPoolerStatus(ctx, c, postgresCluster) - rc.emitPoolerReadyTransition(postgresCluster, poolerOldConditions) + return ctrl.Result{RequeueAfter: retryDelay}, nil +} + +// Free to place in specific dir/place along with the p&a work. +type StateInformationDto struct { + State pgcConstants.State + Condition conditionTypes + Reason conditionReasons + Message string + Phase reconcileClusterPhases + Result ctrl.Result +} + +// a unit of work in a way, extractable. +type clusterReadynessCheck interface { + Condition(ctx context.Context) (StateInformationDto, error) +} + +type provisionerHealthCheck struct { + cluster *enterprisev4.PostgresCluster + cnpgCluster *cnpgv1.Cluster +} + +func newProvisionerHealthCheck(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) *provisionerHealthCheck { + return &provisionerHealthCheck{cluster: cluster, cnpgCluster: cnpgCluster} +} + +func (c *provisionerHealthCheck) Condition(_ context.Context) (StateInformationDto, error) { + if c.cnpgCluster != nil { + c.cluster.Status.ProvisionerRef = &corev1.ObjectReference{ + APIVersion: "postgresql.cnpg.io/v1", + Kind: "Cluster", + Namespace: c.cnpgCluster.Namespace, + Name: c.cnpgCluster.Name, + UID: c.cnpgCluster.UID, } } - logger.Info("Reconciliation complete") - return ctrl.Result{}, nil + + info := StateInformationDto{Condition: clusterReady} + + if c.cnpgCluster == nil { + info.State = pgcConstants.ProvisionerPending + info.Reason = reasonCNPGProvisioning + info.Message = msgCNPGPendingCreation + info.Phase = pendingClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + } + + switch c.cnpgCluster.Status.Phase { + case cnpgv1.PhaseHealthy: + info.State = pgcConstants.ProvisionerReady + info.Reason = reasonCNPGClusterHealthy + info.Message = msgProvisionerHealthy + info.Phase = readyClusterPhase + return info, nil + case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: + info.State = pgcConstants.ProvisionerProvisioning + info.Reason = reasonCNPGProvisioning + info.Message = fmt.Sprintf(msgFmtCNPGProvisioning, c.cnpgCluster.Status.Phase) + info.Phase = provisioningClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseSwitchover: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGSwitchover + info.Message = msgCNPGSwitchover + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseFailOver: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGFailingOver + info.Message = msgCNPGFailingOver + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGRestarting + info.Message = fmt.Sprintf(msgFmtCNPGRestarting, c.cnpgCluster.Status.Phase) + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGUpgrading + info.Message = fmt.Sprintf(msgFmtCNPGUpgrading, c.cnpgCluster.Status.Phase) + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseApplyingConfiguration: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGApplyingConfig + info.Message = msgCNPGApplyingConfiguration + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseReplicaClusterPromotion: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGPromoting + info.Message = msgCNPGPromoting + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseWaitingForUser: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGWaitingForUser + info.Message = msgCNPGWaitingForUser + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner requires user action") + case cnpgv1.PhaseUnrecoverable: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGUnrecoverable + info.Message = msgCNPGUnrecoverable + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner unrecoverable") + case cnpgv1.PhaseCannotCreateClusterObjects: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGProvisioningFailed + info.Message = msgCNPGCannotCreateObjects + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner cannot create cluster objects") + case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGPluginError + info.Message = fmt.Sprintf(msgFmtCNPGPluginError, c.cnpgCluster.Status.Phase) + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner plugin error") + case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGImageError + info.Message = fmt.Sprintf(msgFmtCNPGImageError, c.cnpgCluster.Status.Phase) + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner image error") + case "": + info.State = pgcConstants.ProvisionerPending + info.Reason = reasonCNPGProvisioning + info.Message = msgCNPGPendingCreation + info.Phase = pendingClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + default: + info.State = pgcConstants.ProvisionerProvisioning + info.Reason = reasonCNPGProvisioning + info.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, c.cnpgCluster.Status.Phase) + info.Phase = provisioningClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + } +} + +type poolerHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster + poolerEnabled bool + poolerConfigPresent bool +} + +func newPoolerHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster, poolerEnabled bool, poolerConfigPresent bool) *poolerHealthCheck { + return &poolerHealthCheck{ + client: c, + cluster: cluster, + poolerEnabled: poolerEnabled, + poolerConfigPresent: poolerConfigPresent, + } +} + +func (p *poolerHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if !p.poolerEnabled { + return StateInformationDto{ + State: pgcConstants.PoolerReady, + Condition: poolerReady, + Reason: reasonAllInstancesReady, + Message: msgPoolerDisabled, + Phase: readyClusterPhase, + }, nil + } + if !p.poolerConfigPresent { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerConfigMissing, + Message: msgPoolerConfigMissing, + Phase: failedClusterPhase, + }, fmt.Errorf("pooler config missing") + } + + // TODO: Port material. + rwExists, err := poolerExists(ctx, p.client, p.cluster, readWriteEndpoint) + if err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerReconciliationFailed, + Message: fmt.Sprintf("Failed to check RW pooler existence: %v", err), + Phase: failedClusterPhase, + }, err + } + roExists, err := poolerExists(ctx, p.client, p.cluster, readOnlyEndpoint) + if err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerReconciliationFailed, + Message: fmt.Sprintf("Failed to check RO pooler existence: %v", err), + Phase: failedClusterPhase, + }, err + } + if !rwExists || !roExists { + return StateInformationDto{ + State: pgcConstants.PoolerProvisioning, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgPoolersProvisioning, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + rwPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readWriteEndpoint), + Namespace: p.cluster.Namespace, + }, rwPooler); err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgWaitRWPoolerObject, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + roPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readOnlyEndpoint), + Namespace: p.cluster.Namespace, + }, roPooler); err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgWaitROPoolerObject, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + if !arePoolersReady(rwPooler, roPooler) { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgPoolersNotReady, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + return StateInformationDto{ + State: pgcConstants.PoolerReady, + Condition: poolerReady, + Reason: reasonAllInstancesReady, + Message: msgPoolersReady, + Phase: readyClusterPhase, + }, nil +} + +type configMapHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster +} + +func newConfigMapHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster) *configMapHealthCheck { + return &configMapHealthCheck{client: c, cluster: cluster} +} + +func (c *configMapHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { + return StateInformationDto{ + State: pgcConstants.ConfigMapProvisioning, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: msgConfigMapRefNotPublished, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + cm := &corev1.ConfigMap{} + key := types.NamespacedName{Name: c.cluster.Status.Resources.ConfigMapRef.Name, Namespace: c.cluster.Namespace} + if err := c.client.Get(ctx, key, cm); err != nil { + if apierrors.IsNotFound(err) { + return StateInformationDto{ + State: pgcConstants.ConfigMapProvisioning, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: msgConfigMapNotFoundYet, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + return StateInformationDto{ + State: pgcConstants.ConfigMapFailed, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: fmt.Sprintf("Failed to fetch ConfigMap: %v", err), + Phase: failedClusterPhase, + }, err + } + + requiredKeys := []string{ + configKeyClusterRWEndpoint, + configKeyClusterROEndpoint, + configKeyDefaultClusterPort, + configKeySuperUserSecretRef, + } + for _, requiredKey := range requiredKeys { + if _, ok := cm.Data[requiredKey]; !ok { + return StateInformationDto{ + State: pgcConstants.ConfigMapFailed, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: fmt.Sprintf(msgFmtConfigMapMissingRequiredKey, requiredKey), + Phase: failedClusterPhase, + }, fmt.Errorf("configmap missing key %s", requiredKey) + } + } + + return StateInformationDto{ + State: pgcConstants.ConfigMapReady, + Condition: clusterReady, + Reason: reasonClusterBuildSucceeded, + Message: msgAccessConfigMapReady, + Phase: readyClusterPhase, + }, nil +} + +type secretHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster +} + +func newSecretHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster) *secretHealthCheck { + return &secretHealthCheck{client: c, cluster: cluster} +} + +func (s *secretHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { + return StateInformationDto{ + State: pgcConstants.SecretProvisioning, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: msgSecretRefNotPublished, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + secret := &corev1.Secret{} + key := types.NamespacedName{Name: s.cluster.Status.Resources.SuperUserSecretRef.Name, Namespace: s.cluster.Namespace} + if err := s.client.Get(ctx, key, secret); err != nil { + if apierrors.IsNotFound(err) { + return StateInformationDto{ + State: pgcConstants.SecretProvisioning, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: msgSecretNotFoundYet, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + return StateInformationDto{ + State: pgcConstants.SecretFailed, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: fmt.Sprintf("Failed to fetch superuser secret: %v", err), + Phase: failedClusterPhase, + }, err + } + + refKey := s.cluster.Status.Resources.SuperUserSecretRef.Key + if refKey == "" { + refKey = secretKeyPassword + } + if _, ok := secret.Data[refKey]; !ok { + return StateInformationDto{ + State: pgcConstants.SecretFailed, + Condition: clusterReady, + Reason: reasonSuperUserSecretFailed, + Message: fmt.Sprintf(msgFmtSecretMissingKey, refKey), + Phase: failedClusterPhase, + }, fmt.Errorf("secret missing key %s", refKey) + } + + return StateInformationDto{ + State: pgcConstants.SecretReady, + Condition: clusterReady, + Reason: reasonClusterBuildSucceeded, + Message: msgSuperuserSecretReady, + Phase: readyClusterPhase, + }, nil +} + +func isPendingState(state pgcConstants.State) bool { + switch state { + case pgcConstants.PoolerPending, + pgcConstants.PoolerProvisioning, + pgcConstants.PoolerConfiguring, + pgcConstants.ProvisionerPending, + pgcConstants.ProvisionerProvisioning, + pgcConstants.ProvisionerConfiguring, + pgcConstants.ConfigMapPending, + pgcConstants.ConfigMapProvisioning, + pgcConstants.ConfigMapConfiguring, + pgcConstants.SecretPending, + pgcConstants.SecretProvisioning, + pgcConstants.SecretConfiguring: + return true + default: + return false + } } // getMergedConfig overlays PostgresCluster spec on top of the class defaults. @@ -606,7 +1038,6 @@ func reconcileManagedRoles(ctx context.Context, c client.Client, cluster *enterp Name: role.Name, Ensure: cnpgv1.EnsureAbsent, } - // Exists bool replaces the old Ensure string enum ("present"/"absent"). if role.Exists { r.Ensure = cnpgv1.EnsurePresent r.Login = true @@ -782,63 +1213,6 @@ func syncPoolerStatus(ctx context.Context, c client.Client, cluster *enterprisev readyClusterPhase) } -// syncStatus maps CNPG Cluster state to PostgresCluster status. -func syncStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { - cluster.Status.ProvisionerRef = &corev1.ObjectReference{ - APIVersion: "postgresql.cnpg.io/v1", - Kind: "Cluster", - Namespace: cnpgCluster.Namespace, - Name: cnpgCluster.Name, - UID: cnpgCluster.UID, - } - - var phase reconcileClusterPhases - var condStatus metav1.ConditionStatus - var reason conditionReasons - var message string - - switch cnpgCluster.Status.Phase { - case cnpgv1.PhaseHealthy: - phase, condStatus, reason, message = readyClusterPhase, metav1.ConditionTrue, reasonCNPGClusterHealthy, "Cluster is up and running" - case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster provisioning: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseSwitchover: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGSwitchover, "Cluster changing primary node" - case cnpgv1.PhaseFailOver: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGFailingOver, "Pod missing, need to change primary" - case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGRestarting - message = fmt.Sprintf("CNPG cluster restarting: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGUpgrading - message = fmt.Sprintf("CNPG cluster upgrading: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseApplyingConfiguration: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGApplyingConfig, "Configuration change is being applied" - case cnpgv1.PhaseReplicaClusterPromotion: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGPromoting, "Replica is being promoted to primary" - case cnpgv1.PhaseWaitingForUser: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGWaitingForUser, "Action from the user is required" - case cnpgv1.PhaseUnrecoverable: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGUnrecoverable, "Cluster failed, needs manual intervention" - case cnpgv1.PhaseCannotCreateClusterObjects: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioningFailed, "Cluster resources cannot be created" - case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGPluginError - message = fmt.Sprintf("CNPG plugin error: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGImageError - message = fmt.Sprintf("CNPG image error: %s", cnpgCluster.Status.Phase) - case "": - phase, condStatus, reason, message = pendingClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning, "CNPG cluster is pending creation" - default: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster phase: %s", cnpgCluster.Status.Phase) - } - - return setStatus(ctx, c, cluster, clusterReady, condStatus, reason, message, phase) -} - // setStatus sets the phase, condition and persists the status. // It skips the API write when the resulting status is identical to the current // state, avoiding unnecessary etcd churn and ResourceVersion bumps on stable clusters. diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index e2466f54b..57ff04daa 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -6,6 +6,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -13,11 +14,23 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/utils/ptr" client "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +type configMapNotFoundClient struct { + client.Client +} + +func (c configMapNotFoundClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if _, ok := obj.(*corev1.ConfigMap); ok { + return apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, key.Name) + } + return c.Client.Get(ctx, key, obj, opts...) +} + func TestPoolerResourceName(t *testing.T) { tests := []struct { name string @@ -1136,3 +1149,125 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { assert.Equal(t, int32(1), *ro.Spec.Instances) }) } + +func TestComponentStateTriggerConditions(t *testing.T) { + t.Parallel() + + ctx := t.Context() + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + + exampleCm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-config", + Namespace: "default", + }, + Data: map[string]string{ + "CLUSTER_RW_ENDPOINT": "pg1-rw.default", + "CLUSTER_RO_ENDPOINT": "pg1-ro.default", + "DEFAULT_CLUSTER_PORT": "5432", + "SUPER_USER_SECRET_REF": "pg1-secret", + }, + } + examplePgCluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: "default", + }, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{ + ConfigMapRef: &corev1.LocalObjectReference{Name: "pg1-config"}, + SuperUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "pg1-secret"}, + Key: "password", + }, + }, + }, + } + exampleSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-secret", + Namespace: "default", + }, + Data: map[string][]byte{ + "password": []byte("s3cr3t"), + }, + } + + // TODO: as soon as coupling is addressed, remove this monster of a test. + combinations := []struct { + name string + componentChecks []clusterReadynessCheck + requeue []bool + expectedResult bool + message string + }{ + { + name: "Provisioner ready, pooler pending, sync not successful", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, true, false), + }, + requeue: []bool{false, false}, + expectedResult: false, + message: "Provisioner is ready but pooler is pending, don't fire", + }, + { + name: "Provisioner ready, pooler ready, configMap failed, sync not successful", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster, &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, false, false), + newConfigMapHealthCheck( + configMapNotFoundClient{ + Client: fake.NewClientBuilder(). + WithScheme(scheme). + Build(), + }, + examplePgCluster.DeepCopy(), + ), + }, + requeue: []bool{false, false, true}, + expectedResult: false, + message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", + }, + { + name: "Sync successful, all components ready.", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster, &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, false, false), + newConfigMapHealthCheck( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleCm). + Build(), + examplePgCluster.DeepCopy(), + ), + newSecretHealthCheck( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleSecret). + Build(), + examplePgCluster.DeepCopy(), + ), + }, + requeue: []bool{false, false, false, false}, + expectedResult: true, + message: "", + }, + } + + for _, tt := range combinations { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + state := pgcConstants.EmptyState + for i, check := range tt.componentChecks { + info, _ := check.Condition(ctx) + state |= info.State + assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) + } + assert.Equal(t, tt.expectedResult, state&pgcConstants.ComponentsReady == pgcConstants.ComponentsReady, + tt.message) + }) + } +} diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 042a5ae82..99ea5bcb1 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -42,6 +42,7 @@ type MergedConfig struct { type reconcileClusterPhases string type conditionTypes string type conditionReasons string +type statusMessage = string type objectKind string const ( @@ -50,9 +51,17 @@ const ( readOnlyEndpoint string = "ro" readWriteEndpoint string = "rw" - defaultDatabaseName string = "postgres" - superUsername string = "postgres" - defaultPort string = "5432" + defaultDatabaseName string = "postgres" + superUsername string = "postgres" + defaultPort string = "5432" + configKeyClusterRWEndpoint string = "CLUSTER_RW_ENDPOINT" + configKeyClusterROEndpoint string = "CLUSTER_RO_ENDPOINT" + configKeyClusterREndpoint string = "CLUSTER_R_ENDPOINT" + configKeyDefaultClusterPort string = "DEFAULT_CLUSTER_PORT" + configKeySuperUserName string = "SUPER_USER_NAME" + configKeySuperUserSecretRef string = "SUPER_USER_SECRET_REF" + configKeyPoolerRWEndpoint string = "CLUSTER_POOLER_RW_ENDPOINT" + configKeyPoolerROEndpoint string = "CLUSTER_POOLER_RO_ENDPOINT" secretKeyPassword string = "password" defaultSecretSuffix string = "-secret" @@ -111,4 +120,39 @@ const ( reasonCNPGProvisioningFailed conditionReasons = "CNPGProvisioningFailed" reasonCNPGPluginError conditionReasons = "CNPGPluginError" reasonCNPGImageError conditionReasons = "CNPGImageError" + + // status messages — provisioner health check + msgProvisionerHealthy statusMessage = "Provisioner cluster is healthy" + msgCNPGPendingCreation statusMessage = "CNPG cluster is pending creation" + msgFmtCNPGProvisioning statusMessage = "CNPG cluster provisioning: %s" + msgCNPGSwitchover statusMessage = "Cluster changing primary node" + msgCNPGFailingOver statusMessage = "Pod missing, need to change primary" + msgFmtCNPGRestarting statusMessage = "CNPG cluster restarting: %s" + msgFmtCNPGUpgrading statusMessage = "CNPG cluster upgrading: %s" + msgCNPGApplyingConfiguration statusMessage = "Configuration change is being applied" + msgCNPGPromoting statusMessage = "Replica is being promoted to primary" + msgCNPGWaitingForUser statusMessage = "Action from the user is required" + msgCNPGUnrecoverable statusMessage = "Cluster failed, needs manual intervention" + msgCNPGCannotCreateObjects statusMessage = "Cluster resources cannot be created" + msgFmtCNPGPluginError statusMessage = "CNPG plugin error: %s" + msgFmtCNPGImageError statusMessage = "CNPG image error: %s" + msgFmtCNPGClusterPhase statusMessage = "CNPG cluster phase: %s" + + // status messages — aggregate and component readiness checks + msgAllComponentsReady statusMessage = "All components are ready" + msgPoolerDisabled statusMessage = "Connection pooler disabled" + msgPoolerConfigMissing statusMessage = "Connection pooler enabled but configuration is missing" + msgPoolersProvisioning statusMessage = "Connection poolers are being provisioned" + msgWaitRWPoolerObject statusMessage = "Waiting for RW pooler object" + msgWaitROPoolerObject statusMessage = "Waiting for RO pooler object" + msgPoolersNotReady statusMessage = "Connection poolers are not ready yet" + msgPoolersReady statusMessage = "Connection poolers are ready" + msgConfigMapRefNotPublished statusMessage = "ConfigMap reference not published yet" + msgConfigMapNotFoundYet statusMessage = "ConfigMap not found yet" + msgFmtConfigMapMissingRequiredKey statusMessage = "ConfigMap missing required key %q" + msgAccessConfigMapReady statusMessage = "Access ConfigMap is ready" + msgSecretRefNotPublished statusMessage = "Superuser secret reference not published yet" + msgSecretNotFoundYet statusMessage = "Superuser secret not found yet" + msgFmtSecretMissingKey statusMessage = "Superuser secret missing key %q" + msgSuperuserSecretReady statusMessage = "Superuser secret is ready" ) diff --git a/pkg/postgresql/cluster/core/types/constants/state.go b/pkg/postgresql/cluster/core/types/constants/state.go new file mode 100644 index 000000000..bf19698c6 --- /dev/null +++ b/pkg/postgresql/cluster/core/types/constants/state.go @@ -0,0 +1,41 @@ +package pgcConstants + +type State uint64 + +const ( + EmptyState State = 0 + PoolerReady State = 1 << iota + PoolerPending + PoolerProvisioning + PoolerConfiguring + PoolerFailed + + ProvisionerReady + ProvisionerPending + ProvisionerProvisioning + ProvisionerConfiguring + ProvisionerFailed + + ConfigMapReady + ConfigMapPending + ConfigMapProvisioning + ConfigMapConfiguring + ConfigMapFailed + + SecretReady + SecretPending + SecretProvisioning + SecretConfiguring + SecretFailed + + ClusterReady + ClusterPending + ClusterProvisioning + ClusterConfiguring + ClusterFailed +) + +const ( + ComponentsReady = PoolerReady | ProvisionerReady | SecretReady | ConfigMapReady + OwnershipReady +) diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index f84a35fd9..3a88bac80 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -172,31 +172,29 @@ func PostgresDatabaseService( } // Phase: RoleProvisioning - desiredUsers := getDesiredUsers(postgresDB) - actualRoles := getUsersInClusterSpec(cluster) - var missing []string - for _, role := range desiredUsers { - if !slices.Contains(actualRoles, role) { - missing = append(missing, role) - } - } - - if len(missing) > 0 { - logger.Info("CNPG Cluster patch started, missing roles detected", "missing", missing) - if err := patchManagedRoles(ctx, c, postgresDB, cluster); err != nil { + fieldManager := fieldManagerName(postgresDB.Name) + desired := buildDesiredRoles(postgresDB.Name, postgresDB.Spec.Databases) + rolesToAdd := findAddedRoleNames(cluster, desired) + rolesToRemove := absentRolesByName(findRemovedRoleNames(cluster, fieldManager, desired)) + allRoles := append(desired, rolesToRemove...) + + if len(rolesToAdd) > 0 || len(rolesToRemove) > 0 { + logger.Info("CNPG Cluster patch started, role drift detected", "toAdd", len(rolesToAdd), "toRemove", len(rolesToRemove)) + if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { logger.Error(err, "Failed to patch users in CNPG Cluster") rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) return ctrl.Result{}, err } - rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles, waiting for %d roles to reconcile", len(desiredUsers))) + rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove))) if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, - fmt.Sprintf("Waiting for %d roles to be reconciled", len(desiredUsers)), provisioningDBPhase); err != nil { + fmt.Sprintf("Waiting for roles to be reconciled: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil } - notReadyRoles, err := verifyRolesReady(ctx, desiredUsers, cnpgCluster) + roleNames := getDesiredUsers(postgresDB) + notReadyRoles, err := verifyRolesReady(ctx, roleNames, cnpgCluster) if err != nil { rc.emitWarning(postgresDB, EventRoleFailed, fmt.Sprintf("Role reconciliation failed: %v", err)) if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, @@ -212,9 +210,9 @@ func PostgresDatabaseService( } return ctrl.Result{RequeueAfter: retryDelay}, nil } - rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("All %d roles reconciled", len(desiredUsers))) + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove))) if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonUsersAvailable, - fmt.Sprintf("All %d users in PostgreSQL", len(desiredUsers)), provisioningDBPhase); err != nil { + fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { return ctrl.Result{}, err } @@ -366,6 +364,9 @@ func getUsersInClusterSpec(cluster *enterprisev4.PostgresCluster) []string { return users } +// rolesMatchClusterSpec returns true if desired and actual contain the same roles +// (by name and Exists state), regardless of order. + func getRoleConflicts(postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) []string { myManager := fieldManagerName(postgresDB.Name) desired := make(map[string]struct{}, len(postgresDB.Spec.Databases)*2) @@ -413,18 +414,16 @@ func parseRoleNames(raw []byte) []string { return names } -func patchManagedRoles(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) error { +func patchManagedRoles(ctx context.Context, c client.Client, fieldManager string, cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole) error { logger := log.FromContext(ctx) - allRoles := buildManagedRoles(postgresDB.Name, postgresDB.Spec.Databases) - rolePatch, err := buildManagedRolesPatch(cluster, allRoles, c.Scheme()) + rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) if err != nil { - return fmt.Errorf("building managed roles patch for PostgresDatabase %s: %w", postgresDB.Name, err) + return fmt.Errorf("building managed roles patch: %w", err) } - fieldManager := fieldManagerName(postgresDB.Name) if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { - return fmt.Errorf("patching managed roles for PostgresDatabase %s: %w", postgresDB.Name, err) + return fmt.Errorf("patching managed roles: %w", err) } - logger.Info("Users added to PostgresCluster via SSA", "roleCount", len(allRoles)) + logger.Info("Managed roles patched", "count", len(roles)) return nil } @@ -580,7 +579,15 @@ func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enter logger.Info("PostgresCluster already deleted, skipping role cleanup") return nil } - return patchManagedRolesOnDeletion(ctx, c, postgresDB, cluster, plan.retained) + fieldManager := fieldManagerName(postgresDB.Name) + retainedRoles := buildDesiredRoles(postgresDB.Name, plan.retained) + rolesToRemove := buildRolesToRemove(plan.deleted) + allRoles := append(retainedRoles, rolesToRemove...) + if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { + return err + } + logger.Info("Managed roles patched on deletion", "retained", len(retainedRoles), "removed", len(rolesToRemove)) + return nil } func orphanCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { @@ -716,7 +723,67 @@ func deleteSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev return nil } -func buildManagedRoles(postgresDBName string, databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { +// buildRolesToRemove produces Exists:false entries for the given databases so CNPG drops their roles. +func buildRolesToRemove(databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { + roles := make([]enterprisev4.ManagedRole, 0, len(databases)*2) + for _, dbSpec := range databases { + roles = append(roles, + enterprisev4.ManagedRole{Name: adminRoleName(dbSpec.Name), Exists: false}, + enterprisev4.ManagedRole{Name: rwRoleName(dbSpec.Name), Exists: false}, + ) + } + return roles +} + +// absentRolesByName produces Exists:false entries from a list of raw role names. +// Used by the normal reconcile path where names come from SSA field manager parsing. +func absentRolesByName(names []string) []enterprisev4.ManagedRole { + roles := make([]enterprisev4.ManagedRole, 0, len(names)) + for _, name := range names { + roles = append(roles, enterprisev4.ManagedRole{Name: name, Exists: false}) + } + return roles +} + +// findAddedRoleNames returns role names from the desired list that are missing +// from the cluster spec or currently marked absent. +func findAddedRoleNames(cluster *enterprisev4.PostgresCluster, desired []enterprisev4.ManagedRole) []string { + current := make(map[string]bool, len(cluster.Spec.ManagedRoles)) + for _, r := range cluster.Spec.ManagedRoles { + current[r.Name] = r.Exists + } + var toAdd []string + for _, r := range desired { + exists, found := current[r.Name] + if !found || !exists { + toAdd = append(toAdd, r.Name) + } + } + return toAdd +} + +// findRemovedRoleNames returns role names currently owned by this field manager +// in the cluster spec that are absent from the desired list. +func findRemovedRoleNames(cluster *enterprisev4.PostgresCluster, manager string, desired []enterprisev4.ManagedRole) []string { + desiredSet := make(map[string]struct{}, len(desired)) + for _, r := range desired { + desiredSet[r.Name] = struct{}{} + } + owners := managedRoleOwners(cluster.ManagedFields) + var toRemove []string + for name, owner := range owners { + if owner == manager { + if _, ok := desiredSet[name]; !ok { + toRemove = append(toRemove, name) + } + } + } + return toRemove +} + +// buildDesiredRoles builds the full set of roles that should be present for the given databases. +// This is the input to findAddedRoleNames and findRemovedRoleNames. +func buildDesiredRoles(postgresDBName string, databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { roles := make([]enterprisev4.ManagedRole, 0, len(databases)*2) for _, dbSpec := range databases { roles = append(roles, @@ -752,20 +819,6 @@ func buildManagedRolesPatch(cluster *enterprisev4.PostgresCluster, roles []enter }, nil } -func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster, retained []enterprisev4.DatabaseDefinition) error { - logger := log.FromContext(ctx) - roles := buildManagedRoles(postgresDB.Name, retained) - rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) - if err != nil { - return fmt.Errorf("building managed roles patch: %w", err) - } - if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManagerName(postgresDB.Name))); err != nil { - return fmt.Errorf("patching managed roles on deletion: %w", err) - } - logger.Info("Managed roles patched on deletion", "retainedRoles", len(roles)) - return nil -} - func stripOwnerReference(obj metav1.Object, ownerUID types.UID) { refs := obj.GetOwnerReferences() filtered := make([]metav1.OwnerReference, 0, len(refs)) diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index 8d4da6c52..0e8bee12b 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -306,7 +306,7 @@ func TestVerifyRolesReady(t *testing.T) { }, }, }, - wantErr: "user main_db_rw reconciliation failed: [reserved role]", + wantErr: "reconciling user main_db_rw: [reserved role]", }, { name: "returns missing roles that are not reconciled yet", @@ -1283,7 +1283,7 @@ func TestBuildManagedRoles(t *testing.T) { }, } - got := buildManagedRoles("primary", databases) + got := buildDesiredRoles("primary", databases) assert.Equal(t, want, got) } @@ -1300,7 +1300,7 @@ func TestBuildManagedRolesPatch(t *testing.T) { Namespace: "dbs", }, } - roles := buildManagedRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) + roles := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) c := testClient(t, scheme, cluster) got, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) @@ -1312,37 +1312,152 @@ func TestBuildManagedRolesPatch(t *testing.T) { assert.Equal(t, map[string]any{"managedRoles": roles}, got.Object["spec"]) } -func TestPatchManagedRolesOnDeletion(t *testing.T) { - scheme := testScheme(t) - postgresDB := &enterprisev4.PostgresDatabase{ - ObjectMeta: metav1.ObjectMeta{ - Name: "primary", - Namespace: "dbs", +func TestFindAddedRoleNames(t *testing.T) { + desired := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}, {Name: "api"}}) + + tests := []struct { + name string + current []enterprisev4.ManagedRole + want []string + }{ + { + name: "all missing from cluster", + current: nil, + want: []string{"payments_admin", "payments_rw", "api_admin", "api_rw"}, + }, + { + name: "some already present", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: true}, + {Name: "payments_rw", Exists: true}, + }, + want: []string{"api_admin", "api_rw"}, + }, + { + name: "role present but marked absent — should be re-added", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: false}, + {Name: "payments_rw", Exists: true}, + }, + want: []string{"payments_admin", "api_admin", "api_rw"}, + }, + { + name: "all already present", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: true}, + {Name: "payments_rw", Exists: true}, + {Name: "api_admin", Exists: true}, + {Name: "api_rw", Exists: true}, + }, + want: nil, }, } - cluster := &enterprisev4.PostgresCluster{ - TypeMeta: metav1.TypeMeta{ - APIVersion: enterprisev4.GroupVersion.String(), - Kind: "PostgresCluster", + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ManagedRoles: tc.current}, + } + got := findAddedRoleNames(cluster, desired) + assert.ElementsMatch(t, tc.want, got) + }) + } +} + +type roleFieldOwner struct { + manager string + roles []string +} + +func TestFindRemovedRoleNames(t *testing.T) { + manager := "splunk-operator-primary" + desired := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) + + tests := []struct { + name string + fieldOwners []roleFieldOwner + want []string + }{ + { + name: "no roles owned by any manager", + fieldOwners: nil, + want: nil, }, - ObjectMeta: metav1.ObjectMeta{ - Name: "primary", - Namespace: "dbs", + { + name: "owned roles still in desired — nothing to remove", + fieldOwners: []roleFieldOwner{{manager: manager, roles: []string{"payments_admin", "payments_rw"}}}, + want: nil, + }, + { + name: "owned role no longer in desired — should be removed", + fieldOwners: []roleFieldOwner{{manager: manager, roles: []string{"payments_admin", "payments_rw", "api_admin", "api_rw"}}}, + want: []string{"api_admin", "api_rw"}, + }, + { + name: "role owned by different manager — ignored", + fieldOwners: []roleFieldOwner{{manager: "other-manager", roles: []string{"api_admin", "api_rw"}}}, + want: nil, }, } - retained := []enterprisev4.DatabaseDefinition{{Name: "payments"}} - want := buildManagedRoles(postgresDB.Name, retained) - c := testClient(t, scheme, cluster) - err := patchManagedRolesOnDeletion(context.Background(), c, postgresDB, cluster, retained) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var managedFields []metav1.ManagedFieldsEntry + for _, fo := range tc.fieldOwners { + keys := make([]string, len(fo.roles)) + for i, r := range fo.roles { + keys[i] = `k:{"name":"` + r + `"}` + } + managedFields = append(managedFields, metav1.ManagedFieldsEntry{ + Manager: fo.manager, + FieldsV1: &metav1.FieldsV1{Raw: managedRolesFieldsRaw(t, keys...)}, + Operation: metav1.ManagedFieldsOperationApply, + }) + } + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ManagedFields: managedFields}, + } + got := findRemovedRoleNames(cluster, manager, desired) + assert.ElementsMatch(t, tc.want, got) + }) + } +} + - require.NoError(t, err) +func TestBuildRolesToRemove(t *testing.T) { + tests := []struct { + name string + deleted []enterprisev4.DatabaseDefinition + want []enterprisev4.ManagedRole + }{ + { + name: "nothing to remove", + deleted: nil, + want: []enterprisev4.ManagedRole{}, + }, + { + name: "single database removed", + deleted: []enterprisev4.DatabaseDefinition{{Name: "api"}}, + want: []enterprisev4.ManagedRole{{Name: "api_admin", Exists: false}, {Name: "api_rw", Exists: false}}, + }, + { + name: "multiple databases removed", + deleted: []enterprisev4.DatabaseDefinition{{Name: "api"}, {Name: "payments"}}, + want: []enterprisev4.ManagedRole{ + {Name: "api_admin", Exists: false}, {Name: "api_rw", Exists: false}, + {Name: "payments_admin", Exists: false}, {Name: "payments_rw", Exists: false}, + }, + }, + } - got := &enterprisev4.PostgresCluster{} - require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}, got)) - assert.Equal(t, want, got.Spec.ManagedRoles) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, buildRolesToRemove(tc.deleted)) + }) + } } + func TestStripOwnerReference(t *testing.T) { obj := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index bf07fd19f..fb57dee91 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -31,7 +31,8 @@ const ( readWriteEndpoint string = "rw" deletionPolicyRetain string = "Retain" - + deletionPolicyDelete string = "Delete" + postgresDatabaseFinalizerName string = "postgresdatabases.enterprise.splunk.com/finalizer" annotationRetainedFrom string = "enterprise.splunk.com/retained-from" From bf25dc8a68e12e155bb1c92d04af072ef129ced0 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Fri, 10 Apr 2026 08:50:37 +0200 Subject: [PATCH 02/10] logging changed, pureness fix attempt --- pkg/postgresql/cluster/core/cluster.go | 609 +++++++++--------- .../cluster/core/cluster_unit_test.go | 62 +- .../core/types/constants/components.go | 8 + .../cluster/core/types/constants/state.go | 49 +- 4 files changed, 360 insertions(+), 368 deletions(-) create mode 100644 pkg/postgresql/cluster/core/types/constants/components.go diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 81a928595..207096b97 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -447,58 +447,56 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } } - // Aggregate component readiness from iterative health checks. - state := pgcConstants.EmptyState - conditions := []clusterReadynessCheck{ - newProvisionerHealthCheck(postgresCluster, cnpgCluster), - newPoolerHealthCheck(c, postgresCluster, poolerEnabled, poolerConfigPresent), - newConfigMapHealthCheck(c, postgresCluster), - newSecretHealthCheck(c, postgresCluster), + // Aggregate model readiness from iterative health checks. + components := []componentModel{ + newProvisionerModel(postgresCluster, cnpgCluster), + newPoolerModel(c, postgresCluster, poolerEnabled, poolerConfigPresent), + newConfigMapModel(c, postgresCluster), + newSecretModel(c, postgresCluster), } - for _, check := range conditions { - componentHealth, err := check.Condition(ctx) + for _, component := range components { + health, err := component.Sync(ctx) if err != nil { - if statusErr := updateStatus(componentHealth.Condition, metav1.ConditionFalse, componentHealth.Reason, componentHealth.Message, componentHealth.Phase); statusErr != nil { + logger.Error(err, "Component sync failed", + "component", component.Name(), + "requeueAfter", health.Result.RequeueAfter, + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase, + "state", health.State) + if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { if apierrors.IsConflict(statusErr) { return ctrl.Result{Requeue: true}, nil } return ctrl.Result{}, statusErr } - logger.Error(err, "Component health check reported issues", - "component", check, - "requeueAfter", componentHealth.Result.RequeueAfter, - "reason", componentHealth.Reason) - return componentHealth.Result, err + return health.Result, err } - if isPendingState(componentHealth.State) { - if statusErr := updateStatus(componentHealth.Condition, metav1.ConditionFalse, componentHealth.Reason, componentHealth.Message, componentHealth.Phase); statusErr != nil { + if isPendingState(health.State) { + if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { if apierrors.IsConflict(statusErr) { return ctrl.Result{Requeue: true}, nil } return ctrl.Result{}, statusErr } - return componentHealth.Result, nil + return health.Result, nil } - state |= componentHealth.State } - if state&pgcConstants.ComponentsReady == pgcConstants.ComponentsReady { - logger.Info("Reconciliation complete") - if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { - if apierrors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, err + logger.Info("Reconciliation complete") + if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil } - return ctrl.Result{}, nil + return ctrl.Result{}, err } - return ctrl.Result{RequeueAfter: retryDelay}, nil + return ctrl.Result{}, nil } -// Free to place in specific dir/place along with the p&a work. -type StateInformationDto struct { +// types/dto candidate +type componentHealth struct { State pgcConstants.State Condition conditionTypes Reason conditionReasons @@ -507,154 +505,177 @@ type StateInformationDto struct { Result ctrl.Result } -// a unit of work in a way, extractable. -type clusterReadynessCheck interface { - Condition(ctx context.Context) (StateInformationDto, error) +type componentModel interface { + syncer + namer +} + +type syncer interface { + Sync(ctx context.Context) (componentHealth, error) +} + +type namer interface { + Name() string } -type provisionerHealthCheck struct { +type provisionerModel struct { cluster *enterprisev4.PostgresCluster cnpgCluster *cnpgv1.Cluster + + health componentHealth } -func newProvisionerHealthCheck(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) *provisionerHealthCheck { - return &provisionerHealthCheck{cluster: cluster, cnpgCluster: cnpgCluster} +// TODO: It's a model +// -> that can be addresed when a place for this obj is created in +// proj structure (future) +func newProvisionerModel(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) *provisionerModel { + return &provisionerModel{cluster: cluster, cnpgCluster: cnpgCluster} } -func (c *provisionerHealthCheck) Condition(_ context.Context) (StateInformationDto, error) { - if c.cnpgCluster != nil { - c.cluster.Status.ProvisionerRef = &corev1.ObjectReference{ +func (p *provisionerModel) Name() string { return pgcConstants.ComponentProvisioner } + +func (p *provisionerModel) Sync(_ context.Context) (componentHealth, error) { + if p.cnpgCluster != nil { + p.cluster.Status.ProvisionerRef = &corev1.ObjectReference{ APIVersion: "postgresql.cnpg.io/v1", Kind: "Cluster", - Namespace: c.cnpgCluster.Namespace, - Name: c.cnpgCluster.Name, - UID: c.cnpgCluster.UID, + Namespace: p.cnpgCluster.Namespace, + Name: p.cnpgCluster.Name, + UID: p.cnpgCluster.UID, } } - info := StateInformationDto{Condition: clusterReady} + p.health.Condition = clusterReady - if c.cnpgCluster == nil { - info.State = pgcConstants.ProvisionerPending - info.Reason = reasonCNPGProvisioning - info.Message = msgCNPGPendingCreation - info.Phase = pendingClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + if p.cnpgCluster == nil { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } - switch c.cnpgCluster.Status.Phase { + switch p.cnpgCluster.Status.Phase { case cnpgv1.PhaseHealthy: - info.State = pgcConstants.ProvisionerReady - info.Reason = reasonCNPGClusterHealthy - info.Message = msgProvisionerHealthy - info.Phase = readyClusterPhase - return info, nil + p.health.State = pgcConstants.Ready + p.health.Reason = reasonCNPGClusterHealthy + p.health.Message = msgProvisionerHealthy + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + return p.health, nil case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: - info.State = pgcConstants.ProvisionerProvisioning - info.Reason = reasonCNPGProvisioning - info.Message = fmt.Sprintf(msgFmtCNPGProvisioning, c.cnpgCluster.Status.Phase) - info.Phase = provisioningClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGProvisioning, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil case cnpgv1.PhaseSwitchover: - info.State = pgcConstants.ProvisionerConfiguring - info.Reason = reasonCNPGSwitchover - info.Message = msgCNPGSwitchover - info.Phase = configuringClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGSwitchover + p.health.Message = msgCNPGSwitchover + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil case cnpgv1.PhaseFailOver: - info.State = pgcConstants.ProvisionerConfiguring - info.Reason = reasonCNPGFailingOver - info.Message = msgCNPGFailingOver - info.Phase = configuringClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGFailingOver + p.health.Message = msgCNPGFailingOver + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: - info.State = pgcConstants.ProvisionerConfiguring - info.Reason = reasonCNPGRestarting - info.Message = fmt.Sprintf(msgFmtCNPGRestarting, c.cnpgCluster.Status.Phase) - info.Phase = configuringClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGRestarting + p.health.Message = fmt.Sprintf(msgFmtCNPGRestarting, p.cnpgCluster.Status.Phase) + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: - info.State = pgcConstants.ProvisionerConfiguring - info.Reason = reasonCNPGUpgrading - info.Message = fmt.Sprintf(msgFmtCNPGUpgrading, c.cnpgCluster.Status.Phase) - info.Phase = configuringClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGUpgrading + p.health.Message = fmt.Sprintf(msgFmtCNPGUpgrading, p.cnpgCluster.Status.Phase) + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil case cnpgv1.PhaseApplyingConfiguration: - info.State = pgcConstants.ProvisionerConfiguring - info.Reason = reasonCNPGApplyingConfig - info.Message = msgCNPGApplyingConfiguration - info.Phase = configuringClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGApplyingConfig + p.health.Message = msgCNPGApplyingConfiguration + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil case cnpgv1.PhaseReplicaClusterPromotion: - info.State = pgcConstants.ProvisionerConfiguring - info.Reason = reasonCNPGPromoting - info.Message = msgCNPGPromoting - info.Phase = configuringClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGPromoting + p.health.Message = msgCNPGPromoting + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil case cnpgv1.PhaseWaitingForUser: - info.State = pgcConstants.ProvisionerFailed - info.Reason = reasonCNPGWaitingForUser - info.Message = msgCNPGWaitingForUser - info.Phase = failedClusterPhase - return info, fmt.Errorf("provisioner requires user action") + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGWaitingForUser + p.health.Message = msgCNPGWaitingForUser + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner requires user action") case cnpgv1.PhaseUnrecoverable: - info.State = pgcConstants.ProvisionerFailed - info.Reason = reasonCNPGUnrecoverable - info.Message = msgCNPGUnrecoverable - info.Phase = failedClusterPhase - return info, fmt.Errorf("provisioner unrecoverable") + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGUnrecoverable + p.health.Message = msgCNPGUnrecoverable + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner unrecoverable") case cnpgv1.PhaseCannotCreateClusterObjects: - info.State = pgcConstants.ProvisionerFailed - info.Reason = reasonCNPGProvisioningFailed - info.Message = msgCNPGCannotCreateObjects - info.Phase = failedClusterPhase - return info, fmt.Errorf("provisioner cannot create cluster objects") + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGProvisioningFailed + p.health.Message = msgCNPGCannotCreateObjects + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner cannot create cluster objects") case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: - info.State = pgcConstants.ProvisionerFailed - info.Reason = reasonCNPGPluginError - info.Message = fmt.Sprintf(msgFmtCNPGPluginError, c.cnpgCluster.Status.Phase) - info.Phase = failedClusterPhase - return info, fmt.Errorf("provisioner plugin error") + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGPluginError + p.health.Message = fmt.Sprintf(msgFmtCNPGPluginError, p.cnpgCluster.Status.Phase) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner plugin error") case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: - info.State = pgcConstants.ProvisionerFailed - info.Reason = reasonCNPGImageError - info.Message = fmt.Sprintf(msgFmtCNPGImageError, c.cnpgCluster.Status.Phase) - info.Phase = failedClusterPhase - return info, fmt.Errorf("provisioner image error") + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGImageError + p.health.Message = fmt.Sprintf(msgFmtCNPGImageError, p.cnpgCluster.Status.Phase) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner image error") case "": - info.State = pgcConstants.ProvisionerPending - info.Reason = reasonCNPGProvisioning - info.Message = msgCNPGPendingCreation - info.Phase = pendingClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil default: - info.State = pgcConstants.ProvisionerProvisioning - info.Reason = reasonCNPGProvisioning - info.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, c.cnpgCluster.Status.Phase) - info.Phase = provisioningClusterPhase - info.Result = ctrl.Result{RequeueAfter: retryDelay} - return info, nil + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } } -type poolerHealthCheck struct { +type poolerModel struct { client client.Client cluster *enterprisev4.PostgresCluster poolerEnabled bool poolerConfigPresent bool + + health componentHealth } -func newPoolerHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster, poolerEnabled bool, poolerConfigPresent bool) *poolerHealthCheck { - return &poolerHealthCheck{ +func newPoolerModel(c client.Client, cluster *enterprisev4.PostgresCluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { + return &poolerModel{ client: c, cluster: cluster, poolerEnabled: poolerEnabled, @@ -662,56 +683,54 @@ func newPoolerHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster } } -func (p *poolerHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { +func (p *poolerModel) Name() string { return pgcConstants.ComponentPooler } + +func (p *poolerModel) Sync(ctx context.Context) (componentHealth, error) { + p.health = componentHealth{Condition: poolerReady} + if !p.poolerEnabled { - return StateInformationDto{ - State: pgcConstants.PoolerReady, - Condition: poolerReady, - Reason: reasonAllInstancesReady, - Message: msgPoolerDisabled, - Phase: readyClusterPhase, - }, nil + p.health.State = pgcConstants.Ready + p.health.Reason = reasonAllInstancesReady + p.health.Message = msgPoolerDisabled + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + return p.health, nil } if !p.poolerConfigPresent { - return StateInformationDto{ - State: pgcConstants.PoolerFailed, - Condition: poolerReady, - Reason: reasonPoolerConfigMissing, - Message: msgPoolerConfigMissing, - Phase: failedClusterPhase, - }, fmt.Errorf("pooler config missing") + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerConfigMissing + p.health.Message = msgPoolerConfigMissing + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("pooler config missing") } // TODO: Port material. rwExists, err := poolerExists(ctx, p.client, p.cluster, readWriteEndpoint) if err != nil { - return StateInformationDto{ - State: pgcConstants.PoolerFailed, - Condition: poolerReady, - Reason: reasonPoolerReconciliationFailed, - Message: fmt.Sprintf("Failed to check RW pooler existence: %v", err), - Phase: failedClusterPhase, - }, err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to check RW pooler existence: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, err } roExists, err := poolerExists(ctx, p.client, p.cluster, readOnlyEndpoint) if err != nil { - return StateInformationDto{ - State: pgcConstants.PoolerFailed, - Condition: poolerReady, - Reason: reasonPoolerReconciliationFailed, - Message: fmt.Sprintf("Failed to check RO pooler existence: %v", err), - Phase: failedClusterPhase, - }, err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to check RO pooler existence: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, err } if !rwExists || !roExists { - return StateInformationDto{ - State: pgcConstants.PoolerProvisioning, - Condition: poolerReady, - Reason: reasonPoolerCreating, - Message: msgPoolersProvisioning, - Phase: provisioningClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, nil + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonPoolerCreating + p.health.Message = msgPoolersProvisioning + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } rwPooler := &cnpgv1.Pooler{} @@ -719,90 +738,84 @@ func (p *poolerHealthCheck) Condition(ctx context.Context) (StateInformationDto, Name: poolerResourceName(p.cluster.Name, readWriteEndpoint), Namespace: p.cluster.Namespace, }, rwPooler); err != nil { - return StateInformationDto{ - State: pgcConstants.PoolerPending, - Condition: poolerReady, - Reason: reasonPoolerCreating, - Message: msgWaitRWPoolerObject, - Phase: pendingClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, nil + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgWaitRWPoolerObject + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } roPooler := &cnpgv1.Pooler{} if err := p.client.Get(ctx, types.NamespacedName{ Name: poolerResourceName(p.cluster.Name, readOnlyEndpoint), Namespace: p.cluster.Namespace, }, roPooler); err != nil { - return StateInformationDto{ - State: pgcConstants.PoolerPending, - Condition: poolerReady, - Reason: reasonPoolerCreating, - Message: msgWaitROPoolerObject, - Phase: pendingClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, nil + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgWaitROPoolerObject + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } if !arePoolersReady(rwPooler, roPooler) { - return StateInformationDto{ - State: pgcConstants.PoolerPending, - Condition: poolerReady, - Reason: reasonPoolerCreating, - Message: msgPoolersNotReady, - Phase: pendingClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, nil - } - - return StateInformationDto{ - State: pgcConstants.PoolerReady, - Condition: poolerReady, - Reason: reasonAllInstancesReady, - Message: msgPoolersReady, - Phase: readyClusterPhase, - }, nil + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgPoolersNotReady + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + p.health.State = pgcConstants.Ready + p.health.Reason = reasonAllInstancesReady + p.health.Message = msgPoolersReady + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + return p.health, nil } -type configMapHealthCheck struct { +type configMapModel struct { client client.Client cluster *enterprisev4.PostgresCluster + + health componentHealth } -func newConfigMapHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster) *configMapHealthCheck { - return &configMapHealthCheck{client: c, cluster: cluster} +func newConfigMapModel(c client.Client, cluster *enterprisev4.PostgresCluster) *configMapModel { + return &configMapModel{client: c, cluster: cluster} } -func (c *configMapHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { +func (c *configMapModel) Name() string { return pgcConstants.ComponentConfigMap } + +func (c *configMapModel) Sync(ctx context.Context) (componentHealth, error) { + c.health = componentHealth{Condition: clusterReady} + if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { - return StateInformationDto{ - State: pgcConstants.ConfigMapProvisioning, - Condition: clusterReady, - Reason: reasonConfigMapFailed, - Message: msgConfigMapRefNotPublished, - Phase: provisioningClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, nil + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonConfigMapFailed + c.health.Message = msgConfigMapRefNotPublished + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil } cm := &corev1.ConfigMap{} key := types.NamespacedName{Name: c.cluster.Status.Resources.ConfigMapRef.Name, Namespace: c.cluster.Namespace} if err := c.client.Get(ctx, key, cm); err != nil { if apierrors.IsNotFound(err) { - return StateInformationDto{ - State: pgcConstants.ConfigMapProvisioning, - Condition: clusterReady, - Reason: reasonConfigMapFailed, - Message: msgConfigMapNotFoundYet, - Phase: provisioningClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, nil + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonConfigMapFailed + c.health.Message = msgConfigMapNotFoundYet + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil } - return StateInformationDto{ - State: pgcConstants.ConfigMapFailed, - Condition: clusterReady, - Reason: reasonConfigMapFailed, - Message: fmt.Sprintf("Failed to fetch ConfigMap: %v", err), - Phase: failedClusterPhase, - }, err + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf("Failed to fetch ConfigMap: %v", err) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + return c.health, err } requiredKeys := []string{ @@ -813,66 +826,65 @@ func (c *configMapHealthCheck) Condition(ctx context.Context) (StateInformationD } for _, requiredKey := range requiredKeys { if _, ok := cm.Data[requiredKey]; !ok { - return StateInformationDto{ - State: pgcConstants.ConfigMapFailed, - Condition: clusterReady, - Reason: reasonConfigMapFailed, - Message: fmt.Sprintf(msgFmtConfigMapMissingRequiredKey, requiredKey), - Phase: failedClusterPhase, - }, fmt.Errorf("configmap missing key %s", requiredKey) + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf(msgFmtConfigMapMissingRequiredKey, requiredKey) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + return c.health, fmt.Errorf("configmap missing key %s", requiredKey) } } - return StateInformationDto{ - State: pgcConstants.ConfigMapReady, - Condition: clusterReady, - Reason: reasonClusterBuildSucceeded, - Message: msgAccessConfigMapReady, - Phase: readyClusterPhase, - }, nil + c.health.State = pgcConstants.Ready + c.health.Reason = reasonClusterBuildSucceeded + c.health.Message = msgAccessConfigMapReady + c.health.Phase = readyClusterPhase + c.health.Result = ctrl.Result{} + return c.health, nil } -type secretHealthCheck struct { +type secretModel struct { client client.Client cluster *enterprisev4.PostgresCluster + + health componentHealth } -func newSecretHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster) *secretHealthCheck { - return &secretHealthCheck{client: c, cluster: cluster} +func newSecretModel(c client.Client, cluster *enterprisev4.PostgresCluster) *secretModel { + return &secretModel{client: c, cluster: cluster} } -func (s *secretHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { +func (s *secretModel) Name() string { return pgcConstants.ComponentSecret } + +func (s *secretModel) Sync(ctx context.Context) (componentHealth, error) { + s.health = componentHealth{Condition: clusterReady} + if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { - return StateInformationDto{ - State: pgcConstants.SecretProvisioning, - Condition: clusterReady, - Reason: reasonUserSecretFailed, - Message: msgSecretRefNotPublished, - Phase: provisioningClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, nil + s.health.State = pgcConstants.Provisioning + s.health.Reason = reasonUserSecretFailed + s.health.Message = msgSecretRefNotPublished + s.health.Phase = provisioningClusterPhase + s.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return s.health, nil } secret := &corev1.Secret{} key := types.NamespacedName{Name: s.cluster.Status.Resources.SuperUserSecretRef.Name, Namespace: s.cluster.Namespace} if err := s.client.Get(ctx, key, secret); err != nil { if apierrors.IsNotFound(err) { - return StateInformationDto{ - State: pgcConstants.SecretProvisioning, - Condition: clusterReady, - Reason: reasonUserSecretFailed, - Message: msgSecretNotFoundYet, - Phase: provisioningClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, nil + s.health.State = pgcConstants.Provisioning + s.health.Reason = reasonUserSecretFailed + s.health.Message = msgSecretNotFoundYet + s.health.Phase = provisioningClusterPhase + s.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return s.health, nil } - return StateInformationDto{ - State: pgcConstants.SecretFailed, - Condition: clusterReady, - Reason: reasonUserSecretFailed, - Message: fmt.Sprintf("Failed to fetch superuser secret: %v", err), - Phase: failedClusterPhase, - }, err + s.health.State = pgcConstants.Failed + s.health.Reason = reasonUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to fetch superuser secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + return s.health, err } refKey := s.cluster.Status.Resources.SuperUserSecretRef.Key @@ -880,38 +892,27 @@ func (s *secretHealthCheck) Condition(ctx context.Context) (StateInformationDto, refKey = secretKeyPassword } if _, ok := secret.Data[refKey]; !ok { - return StateInformationDto{ - State: pgcConstants.SecretFailed, - Condition: clusterReady, - Reason: reasonSuperUserSecretFailed, - Message: fmt.Sprintf(msgFmtSecretMissingKey, refKey), - Phase: failedClusterPhase, - }, fmt.Errorf("secret missing key %s", refKey) - } - - return StateInformationDto{ - State: pgcConstants.SecretReady, - Condition: clusterReady, - Reason: reasonClusterBuildSucceeded, - Message: msgSuperuserSecretReady, - Phase: readyClusterPhase, - }, nil + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf(msgFmtSecretMissingKey, refKey) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + return s.health, fmt.Errorf("secret missing key %s", refKey) + } + + s.health.State = pgcConstants.Ready + s.health.Reason = reasonClusterBuildSucceeded + s.health.Message = msgSuperuserSecretReady + s.health.Phase = readyClusterPhase + s.health.Result = ctrl.Result{} + return s.health, nil } func isPendingState(state pgcConstants.State) bool { switch state { - case pgcConstants.PoolerPending, - pgcConstants.PoolerProvisioning, - pgcConstants.PoolerConfiguring, - pgcConstants.ProvisionerPending, - pgcConstants.ProvisionerProvisioning, - pgcConstants.ProvisionerConfiguring, - pgcConstants.ConfigMapPending, - pgcConstants.ConfigMapProvisioning, - pgcConstants.ConfigMapConfiguring, - pgcConstants.SecretPending, - pgcConstants.SecretProvisioning, - pgcConstants.SecretConfiguring: + case pgcConstants.Pending, + pgcConstants.Provisioning, + pgcConstants.Configuring: return true default: return false diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 57ff04daa..f0ff5468c 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -1196,28 +1196,28 @@ func TestComponentStateTriggerConditions(t *testing.T) { // TODO: as soon as coupling is addressed, remove this monster of a test. combinations := []struct { - name string - componentChecks []clusterReadynessCheck - requeue []bool - expectedResult bool - message string + name string + syncers []syncer + requeue []bool + expectAll bool + message string }{ { name: "Provisioner ready, pooler pending, sync not successful", - componentChecks: []clusterReadynessCheck{ - newProvisionerHealthCheck(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), - newPoolerHealthCheck(nil, nil, true, false), + syncers: []syncer{ + newProvisionerModel(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerModel(nil, examplePgCluster.DeepCopy(), true, false), }, - requeue: []bool{false, false}, - expectedResult: false, - message: "Provisioner is ready but pooler is pending, don't fire", + requeue: []bool{false, false}, + expectAll: false, + message: "Provisioner is ready but pooler is pending, don't fire", }, { name: "Provisioner ready, pooler ready, configMap failed, sync not successful", - componentChecks: []clusterReadynessCheck{ - newProvisionerHealthCheck(examplePgCluster, &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), - newPoolerHealthCheck(nil, nil, false, false), - newConfigMapHealthCheck( + syncers: []syncer{ + newProvisionerModel(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerModel(nil, examplePgCluster.DeepCopy(), false, false), + newConfigMapModel( configMapNotFoundClient{ Client: fake.NewClientBuilder(). WithScheme(scheme). @@ -1226,23 +1226,23 @@ func TestComponentStateTriggerConditions(t *testing.T) { examplePgCluster.DeepCopy(), ), }, - requeue: []bool{false, false, true}, - expectedResult: false, - message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", + requeue: []bool{false, false, true}, + expectAll: false, + message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", }, { name: "Sync successful, all components ready.", - componentChecks: []clusterReadynessCheck{ - newProvisionerHealthCheck(examplePgCluster, &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), - newPoolerHealthCheck(nil, nil, false, false), - newConfigMapHealthCheck( + syncers: []syncer{ + newProvisionerModel(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerModel(nil, examplePgCluster.DeepCopy(), false, false), + newConfigMapModel( fake.NewClientBuilder(). WithScheme(scheme). WithObjects(exampleCm). Build(), examplePgCluster.DeepCopy(), ), - newSecretHealthCheck( + newSecretModel( fake.NewClientBuilder(). WithScheme(scheme). WithObjects(exampleSecret). @@ -1250,9 +1250,9 @@ func TestComponentStateTriggerConditions(t *testing.T) { examplePgCluster.DeepCopy(), ), }, - requeue: []bool{false, false, false, false}, - expectedResult: true, - message: "", + requeue: []bool{false, false, false, false}, + expectAll: true, + message: "", }, } @@ -1260,13 +1260,13 @@ func TestComponentStateTriggerConditions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - state := pgcConstants.EmptyState - for i, check := range tt.componentChecks { - info, _ := check.Condition(ctx) - state |= info.State + state := pgcConstants.Empty + for i, check := range tt.syncers { + info, _ := check.Sync(ctx) + state = info.State assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) } - assert.Equal(t, tt.expectedResult, state&pgcConstants.ComponentsReady == pgcConstants.ComponentsReady, + assert.Equal(t, tt.expectAll, state&pgcConstants.Ready == pgcConstants.Ready, tt.message) }) } diff --git a/pkg/postgresql/cluster/core/types/constants/components.go b/pkg/postgresql/cluster/core/types/constants/components.go new file mode 100644 index 000000000..a77b88d74 --- /dev/null +++ b/pkg/postgresql/cluster/core/types/constants/components.go @@ -0,0 +1,8 @@ +package pgcConstants + +const ( + ComponentProvisioner = "provisioner" + ComponentPooler = "pooler" + ComponentConfigMap = "configmap" + ComponentSecret = "secret" +) diff --git a/pkg/postgresql/cluster/core/types/constants/state.go b/pkg/postgresql/cluster/core/types/constants/state.go index bf19698c6..7f4da47e9 100644 --- a/pkg/postgresql/cluster/core/types/constants/state.go +++ b/pkg/postgresql/cluster/core/types/constants/state.go @@ -3,39 +3,22 @@ package pgcConstants type State uint64 const ( - EmptyState State = 0 - PoolerReady State = 1 << iota - PoolerPending - PoolerProvisioning - PoolerConfiguring - PoolerFailed - - ProvisionerReady - ProvisionerPending - ProvisionerProvisioning - ProvisionerConfiguring - ProvisionerFailed - - ConfigMapReady - ConfigMapPending - ConfigMapProvisioning - ConfigMapConfiguring - ConfigMapFailed + Empty State = 0 + Ready State = 1 << iota + Pending + Provisioning + Configuring + Failed +) - SecretReady - SecretPending - SecretProvisioning - SecretConfiguring - SecretFailed +func (s State) Contains(state State) bool { + return s&state == state +} - ClusterReady - ClusterPending - ClusterProvisioning - ClusterConfiguring - ClusterFailed -) +func (s State) Add(state State) State { + return s | state +} -const ( - ComponentsReady = PoolerReady | ProvisionerReady | SecretReady | ConfigMapReady - OwnershipReady -) +func (s State) Remove(state State) State { + return s &^ state +} From c71c27ea24283be1a3e675792cacdaaa87aa543d Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Fri, 10 Apr 2026 12:22:48 +0200 Subject: [PATCH 03/10] removed redundant sync at the end --- pkg/postgresql/cluster/core/cluster.go | 107 ++++++++++++++++--------- 1 file changed, 71 insertions(+), 36 deletions(-) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 207096b97..8bcb48667 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -203,6 +203,12 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. Key: secretKeyPassword, } } + // Secret provisioning phase checkpoint. + secretComponent := newSecretModel(c, postgresCluster) + secretHealth, secretSyncErr := secretComponent.Sync(ctx) + if result, err := manageComponentHealth(ctx, secretComponent.Name(), updateStatus, secretHealth, secretSyncErr); err != nil || result != (ctrl.Result{}) { + return result, err + } // Build desired CNPG Cluster spec. desiredSpec := buildCNPGClusterSpec(mergedConfig, postgresSecretName) @@ -283,6 +289,12 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } return ctrl.Result{}, err } + // Provisioner phase checkpoint. + provisionerComponent := newProvisionerModel(postgresCluster, cnpgCluster) + provisionerHealth, provisionerSyncErr := provisionerComponent.Sync(ctx) + if result, err := manageComponentHealth(ctx, provisionerComponent.Name(), updateStatus, provisionerHealth, provisionerSyncErr); err != nil || result != (ctrl.Result{}) { + return result, err + } // Reconcile Connection Pooler. poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled @@ -397,6 +409,15 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } rc.emitPoolerReadyTransition(postgresCluster, oldConditions) } + // Pooler provisioning phase checkpoint. + poolerComponent := newPoolerModel(c, postgresCluster, poolerEnabled, poolerConfigPresent) + poolerHealth, poolerSyncErr := poolerComponent.Sync(ctx) + if result, err := manageComponentHealth( + ctx, poolerComponent.Name(), + updateStatus, poolerHealth, + poolerSyncErr); err != nil || result != (ctrl.Result{}) { + return result, err + } // Reconcile ConfigMap when CNPG cluster is healthy. if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { @@ -446,52 +467,66 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. postgresCluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} } } - - // Aggregate model readiness from iterative health checks. - components := []componentModel{ - newProvisionerModel(postgresCluster, cnpgCluster), - newPoolerModel(c, postgresCluster, poolerEnabled, poolerConfigPresent), - newConfigMapModel(c, postgresCluster), - newSecretModel(c, postgresCluster), + // ConfigMap provisioning phase checkpoint. + configMapComponent := newConfigMapModel(c, postgresCluster) + configMapHealth, configMapSyncErr := configMapComponent.Sync(ctx) + if result, err := manageComponentHealth( + ctx, configMapComponent.Name(), + updateStatus, configMapHealth, + configMapSyncErr); err != nil || result != (ctrl.Result{}) { + return result, err } - for _, component := range components { - health, err := component.Sync(ctx) - if err != nil { - logger.Error(err, "Component sync failed", - "component", component.Name(), - "requeueAfter", health.Result.RequeueAfter, - "condition", health.Condition, - "reason", health.Reason, - "phase", health.Phase, - "state", health.State) - if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { - if apierrors.IsConflict(statusErr) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, statusErr - } - return health.Result, err + logger.Info("Reconciliation complete") + if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil } + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} - if isPendingState(health.State) { - if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { - if apierrors.IsConflict(statusErr) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, statusErr +func manageComponentHealth( + ctx context.Context, + componentName string, + updateStatus func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error, + health componentHealth, + syncErr error, +) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + if syncErr != nil { + if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + return ctrl.Result{Requeue: true}, nil } - return health.Result, nil + return ctrl.Result{}, statusErr } + return health.Result, syncErr } - logger.Info("Reconciliation complete") - if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { - if apierrors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil + if isPendingState(health.State) { + logger.Info("Component sync pending", + "component", componentName, + "requeueAfter", health.Result.RequeueAfter, + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase) + if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, statusErr } - return ctrl.Result{}, err + return health.Result, nil } + + logger.Info("Component sync ready", + "component", componentName, + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase) return ctrl.Result{}, nil } From 1f17ce5a226d536c054d6df52461db42868e4fd4 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Mon, 13 Apr 2026 10:39:29 +0200 Subject: [PATCH 04/10] incremental state building with limited redundancy --- .../postgrescluster_controller_test.go | 2 +- pkg/postgresql/cluster/core/cluster.go | 663 +++++++++--------- .../cluster/core/cluster_unit_test.go | 72 +- 3 files changed, 378 insertions(+), 359 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 5687ae1f8..9622068b6 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -202,7 +202,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") Expect(cond).NotTo(BeNil()) Expect(cond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cond.Reason).To(Equal("ClusterBuildSucceeded")) + Expect(cond.Reason).To(Equal("CNPGClusterProvisioning")) // Simulate external CNPG controller status progression. cnpg := &cnpgv1.Cluster{} diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 8bcb48667..513982c2f 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -140,342 +140,77 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger.Info("Superuser secret name derived", "name", postgresSecretName) } - secretExists, secretErr := clusterSecretExists(ctx, c, postgresCluster.Namespace, postgresSecretName, secret) - if secretErr != nil { - logger.Error(secretErr, "Failed to check if PostgresCluster secret exists", "name", postgresSecretName) - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, - fmt.Sprintf("Failed to check secret existence: %v", secretErr), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, secretErr - } - if !secretExists { - logger.Info("Superuser secret creation started", "name", postgresSecretName) - if err := ensureClusterSecret(ctx, c, rc.Scheme, postgresCluster, postgresSecretName, secret); err != nil { - logger.Error(err, "Failed to ensure PostgresCluster secret", "name", postgresSecretName) - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, - fmt.Sprintf("Failed to generate PostgresCluster secret: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err - } - if err := c.Status().Update(ctx, postgresCluster); err != nil { - if apierrors.IsConflict(err) { - logger.Info("Conflict after secret creation, will requeue") - return ctrl.Result{Requeue: true}, nil - } - logger.Error(err, "Failed to update status after secret creation") - return ctrl.Result{}, err - } - rc.emitNormal(postgresCluster, EventSecretReady, fmt.Sprintf("Superuser secret %s created", postgresSecretName)) - logger.Info("Superuser secret ref persisted to status") - } + poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil - // Re-attach ownerRef if it was stripped (e.g. by a Retain-policy deletion of a previous cluster). - hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), postgresCluster, rc.Scheme) - if ownerRefErr != nil { - logger.Error(ownerRefErr, "Failed to check owner reference on Secret") - return ctrl.Result{}, fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + components := []componentModel{ + newSecretModel(c, rc.Scheme, rc, postgresCluster, postgresSecretName), + newProvisionerModel(c, rc.Scheme, rc, postgresCluster, mergedConfig, postgresSecretName), } - if secretExists && !hasOwnerRef { - logger.Info("Existing secret linked to PostgresCluster", "name", postgresSecretName) - rc.emitNormal(postgresCluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", postgresSecretName)) - originalSecret := secret.DeepCopy() - if err := ctrl.SetControllerReference(postgresCluster, secret, rc.Scheme); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to set controller reference on existing secret: %w", err) + + phase := func(component componentModel) (ctrl.Result, error) { + gate, gateErr := component.EvaluatePrerequisites(ctx) + if gateErr != nil { + return ctrl.Result{}, fmt.Errorf("%s prerequisites: %w", component.Name(), gateErr) } - if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { - logger.Error(err, "Failed to patch existing secret with controller reference") - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonSuperUserSecretFailed, - fmt.Sprintf("Failed to patch existing secret: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if !gate.Allowed { + result, healthErr := manageComponentHealth(ctx, component.Name(), updateStatus, gate.Health, nil) + if healthErr != nil { + return result, healthErr + } + if result != (ctrl.Result{}) { + return result, nil } - return ctrl.Result{}, err + return gate.Health.Result, nil } - } - if postgresCluster.Status.Resources.SuperUserSecretRef == nil { - postgresCluster.Status.Resources.SuperUserSecretRef = &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{Name: postgresSecretName}, - Key: secretKeyPassword, + if err := component.Actuate(ctx); err != nil { + return ctrl.Result{}, fmt.Errorf("%s actuate: %w", component.Name(), err) } - } - // Secret provisioning phase checkpoint. - secretComponent := newSecretModel(c, postgresCluster) - secretHealth, secretSyncErr := secretComponent.Sync(ctx) - if result, err := manageComponentHealth(ctx, secretComponent.Name(), updateStatus, secretHealth, secretSyncErr); err != nil || result != (ctrl.Result{}) { - return result, err - } - - // Build desired CNPG Cluster spec. - desiredSpec := buildCNPGClusterSpec(mergedConfig, postgresSecretName) - // Fetch existing CNPG Cluster or create it. - existingCNPG := &cnpgv1.Cluster{} - err = c.Get(ctx, types.NamespacedName{Name: postgresCluster.Name, Namespace: postgresCluster.Namespace}, existingCNPG) - switch { - case apierrors.IsNotFound(err): - logger.Info("CNPG Cluster creation started", "name", postgresCluster.Name) - newCluster := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName) - if err := c.Create(ctx, newCluster); err != nil { - logger.Error(err, "Failed to create CNPG Cluster") - rc.emitWarning(postgresCluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildFailed, - fmt.Sprintf("Failed to create CNPG Cluster: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + health, err := component.Converge(ctx) + result, healthErr := manageComponentHealth(ctx, component.Name(), updateStatus, health, err) + if healthErr != nil { + return result, healthErr } - rc.emitNormal(postgresCluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, - "CNPG Cluster created", pendingClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if err != nil { + return health.Result, fmt.Errorf("%s converge: %w", component.Name(), err) } - logger.Info("CNPG Cluster created, requeueing for status update", "name", postgresCluster.Name) - return ctrl.Result{RequeueAfter: retryDelay}, nil - case err != nil: - logger.Error(err, "Failed to get CNPG Cluster") - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterGetFailed, - fmt.Sprintf("Failed to get CNPG Cluster: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if isPendingState(health.State) { + return health.Result, nil } - return ctrl.Result{}, err + return result, nil } - // Patch CNPG Cluster spec if drift detected. - cnpgCluster = existingCNPG - currentNormalized := normalizeCNPGClusterSpec(cnpgCluster.Spec, mergedConfig.Spec.PostgreSQLConfig) - desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, mergedConfig.Spec.PostgreSQLConfig) - - if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { - logger.Info("CNPG Cluster spec drift detected, patch started", "name", cnpgCluster.Name) - originalCluster := cnpgCluster.DeepCopy() - cnpgCluster.Spec = desiredSpec - - switch patchErr := patchObject(ctx, c, originalCluster, cnpgCluster, "CNPGCluster"); { - case apierrors.IsConflict(patchErr): - logger.Info("Conflict occurred while updating CNPG Cluster, requeueing", "name", cnpgCluster.Name) - return ctrl.Result{Requeue: true}, nil - case patchErr != nil: - logger.Error(patchErr, "Failed to patch CNPG Cluster", "name", cnpgCluster.Name) - rc.emitWarning(postgresCluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterPatchFailed, - fmt.Sprintf("Failed to patch CNPG Cluster: %v", patchErr), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, patchErr - default: - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, - "CNPG Cluster spec updated, waiting for healthy state", provisioningClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status after patch") - return ctrl.Result{Requeue: true}, nil - } - rc.emitNormal(postgresCluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") - logger.Info("CNPG Cluster patched, requeueing for status update", "name", cnpgCluster.Name) - return ctrl.Result{RequeueAfter: retryDelay}, nil + for _, component := range components { + result, err := phase(component) + if err != nil { + return result, err } - } - - // Reconcile ManagedRoles. - if err := reconcileManagedRoles(ctx, c, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to reconcile managed roles") - rc.emitWarning(postgresCluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed, - fmt.Sprintf("Failed to reconcile managed roles: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result != (ctrl.Result{}) { + return result, nil } - return ctrl.Result{}, err - } - // Provisioner phase checkpoint. - provisionerComponent := newProvisionerModel(postgresCluster, cnpgCluster) - provisionerHealth, provisionerSyncErr := provisionerComponent.Sync(ctx) - if result, err := manageComponentHealth(ctx, provisionerComponent.Name(), updateStatus, provisionerHealth, provisionerSyncErr); err != nil || result != (ctrl.Result{}) { - return result, err } - // Reconcile Connection Pooler. - poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled - poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil - - rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) - if err != nil { - logger.Error(err, "Failed to check RW pooler existence") - errs := []error{err} - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - errs = append(errs, statusErr) - } - return ctrl.Result{}, errors.Join(errs...) - } - roPoolerExists, err := poolerExists(ctx, c, postgresCluster, readOnlyEndpoint) - if err != nil { - logger.Error(err, "Failed to check RO pooler existence") - errs := []error{err} - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - errs = append(errs, statusErr) - } - return ctrl.Result{}, errors.Join(errs...) + provisionerComponent := components[1].(*provisionerModel) + cnpgCluster = provisionerComponent.cnpgCluster + runtimeView := provisionerRuntimeView{model: provisionerComponent} + oldConditions := append([]metav1.Condition(nil), postgresCluster.Status.Conditions...) + runtimeComponents := []componentModel{ + newPoolerModel(c, rc.Scheme, rc, postgresCluster, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), + newConfigMapModel(c, rc.Scheme, rc, runtimeView, postgresCluster, postgresSecretName), } - switch { - case !poolerEnabled: - if err := deleteConnectionPoolers(ctx, c, postgresCluster); err != nil { - logger.Error(err, "Failed to delete connection poolers") - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to delete connection poolers: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err - } - postgresCluster.Status.ConnectionPoolerStatus = nil - meta.RemoveStatusCondition(&postgresCluster.Status.Conditions, string(poolerReady)) - - case !rwPoolerExists || !roPoolerExists: - if mergedConfig.CNPG == nil || mergedConfig.CNPG.ConnectionPooler == nil { - logger.Info("Connection pooler enabled but no config found in class or cluster spec, skipping", - "class", postgresCluster.Spec.Class, "cluster", postgresCluster.Name) - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerConfigMissing, - fmt.Sprintf("Connection pooler is enabled but no config found in class %q or cluster %q", - postgresCluster.Spec.Class, postgresCluster.Name), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, nil - } - if cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { - logger.Info("CNPG Cluster not healthy yet, pending pooler creation", "clusterPhase", cnpgCluster.Status.Phase) - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonCNPGClusterNotHealthy, - "Waiting for CNPG cluster to become healthy before creating poolers", pendingClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{RequeueAfter: retryDelay}, nil - } - if err := createOrUpdateConnectionPoolers(ctx, c, rc.Scheme, postgresCluster, mergedConfig, cnpgCluster); err != nil { - logger.Error(err, "Failed to reconcile connection pooler") - rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to reconcile connection pooler: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err - } - rc.emitNormal(postgresCluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") - logger.Info("Connection pooler creation started, requeueing") - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, - "Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{RequeueAfter: retryDelay}, nil - - case func() bool { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), - Namespace: postgresCluster.Namespace, - }, rwPooler) - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), - Namespace: postgresCluster.Namespace, - }, roPooler) - return rwErr != nil || roErr != nil || !arePoolersReady(rwPooler, roPooler) - }(): - logger.Info("Connection Poolers are not ready yet, requeueing") - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, - "Connection poolers are being provisioned", pendingClusterPhase); statusErr != nil { - if apierrors.IsConflict(statusErr) { - logger.Info("Conflict updating pooler status, will requeue") - return ctrl.Result{Requeue: true}, nil - } - } - return ctrl.Result{RequeueAfter: retryDelay}, nil - - default: - oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(oldConditions, postgresCluster.Status.Conditions) - if err := syncPoolerStatus(ctx, c, postgresCluster); err != nil { - logger.Error(err, "Failed to sync pooler status") - rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to sync pooler status: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err - } - rc.emitPoolerReadyTransition(postgresCluster, oldConditions) - } - // Pooler provisioning phase checkpoint. - poolerComponent := newPoolerModel(c, postgresCluster, poolerEnabled, poolerConfigPresent) - poolerHealth, poolerSyncErr := poolerComponent.Sync(ctx) - if result, err := manageComponentHealth( - ctx, poolerComponent.Name(), - updateStatus, poolerHealth, - poolerSyncErr); err != nil || result != (ctrl.Result{}) { - return result, err - } - - // Reconcile ConfigMap when CNPG cluster is healthy. - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - logger.Info("CNPG Cluster healthy, reconciling ConfigMap") - desiredCM, err := generateConfigMap(ctx, c, rc.Scheme, postgresCluster, cnpgCluster, postgresSecretName) - if err != nil { - logger.Error(err, "Failed to generate ConfigMap") - rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, - fmt.Sprintf("Failed to generate ConfigMap: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err - } - cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} - createOrUpdateResult, err := controllerutil.CreateOrUpdate(ctx, c, cm, func() error { - cm.Data = desiredCM.Data - cm.Annotations = desiredCM.Annotations - cm.Labels = desiredCM.Labels - if !metav1.IsControlledBy(cm, postgresCluster) { - if err := ctrl.SetControllerReference(postgresCluster, cm, rc.Scheme); err != nil { - return fmt.Errorf("setting controller reference: %w", err) - } - } - return nil - }) + for _, component := range runtimeComponents { + result, err := phase(component) if err != nil { - logger.Error(err, "Failed to reconcile ConfigMap", "name", desiredCM.Name) - rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, - fmt.Sprintf("Failed to reconcile ConfigMap: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + return result, err } - switch createOrUpdateResult { - case controllerutil.OperationResultCreated: - rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s created", desiredCM.Name)) - logger.Info("ConfigMap created", "name", desiredCM.Name) - case controllerutil.OperationResultUpdated: - rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s updated", desiredCM.Name)) - logger.Info("ConfigMap updated", "name", desiredCM.Name) - default: - logger.Info("ConfigMap unchanged", "name", desiredCM.Name) + if result != (ctrl.Result{}) { + return result, nil } - if postgresCluster.Status.Resources.ConfigMapRef == nil { - postgresCluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} - } - } - // ConfigMap provisioning phase checkpoint. - configMapComponent := newConfigMapModel(c, postgresCluster) - configMapHealth, configMapSyncErr := configMapComponent.Sync(ctx) - if result, err := manageComponentHealth( - ctx, configMapComponent.Name(), - updateStatus, configMapHealth, - configMapSyncErr); err != nil || result != (ctrl.Result{}) { - return result, err } + rc.emitPoolerReadyTransition(postgresCluster, oldConditions) logger.Info("Reconciliation complete") if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { @@ -541,35 +276,128 @@ type componentHealth struct { } type componentModel interface { - syncer - namer + phase } -type syncer interface { - Sync(ctx context.Context) (componentHealth, error) +type actuator interface { + Actuate(ctx context.Context) error +} + +type converger interface { + Converge(ctx context.Context) (componentHealth, error) +} + +type dependencyGated interface { + EvaluatePrerequisites(ctx context.Context) (prerequisiteDecision, error) +} + +type phase interface { + actuator + converger + dependencyGated + namer } type namer interface { Name() string } +type prerequisiteDecision struct { + Allowed bool + Health componentHealth +} + +type eventEmitter interface { + emitNormal(obj client.Object, reason, message string) + emitWarning(obj client.Object, reason, message string) +} + +type clusterRuntimeView interface { + Cluster() *cnpgv1.Cluster + IsHealthy() bool +} + +type provisionerRuntimeView struct { + model *provisionerModel +} + +func (v provisionerRuntimeView) Cluster() *cnpgv1.Cluster { + return v.model.cnpgCluster +} + +func (v provisionerRuntimeView) IsHealthy() bool { + return v.model.cnpgCluster != nil && v.model.cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy +} + type provisionerModel struct { - cluster *enterprisev4.PostgresCluster - cnpgCluster *cnpgv1.Cluster + client client.Client + scheme *runtime.Scheme + events eventEmitter + cluster *enterprisev4.PostgresCluster + mergedConfig *MergedConfig + secretName string + cnpgCluster *cnpgv1.Cluster health componentHealth } -// TODO: It's a model -// -> that can be addresed when a place for this obj is created in -// proj structure (future) -func newProvisionerModel(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) *provisionerModel { - return &provisionerModel{cluster: cluster, cnpgCluster: cnpgCluster} +func newProvisionerModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, cluster *enterprisev4.PostgresCluster, mergedConfig *MergedConfig, secretName string) *provisionerModel { + return &provisionerModel{client: c, scheme: scheme, events: events, cluster: cluster, mergedConfig: mergedConfig, secretName: secretName} } func (p *provisionerModel) Name() string { return pgcConstants.ComponentProvisioner } -func (p *provisionerModel) Sync(_ context.Context) (componentHealth, error) { +func (p *provisionerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if p.cluster.Status.Resources == nil || p.cluster.Status.Resources.SuperUserSecretRef == nil { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Pending, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: msgSecretRefNotPublished, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (p *provisionerModel) Actuate(ctx context.Context) error { + desiredSpec := buildCNPGClusterSpec(p.mergedConfig, p.secretName) + existingCNPG := &cnpgv1.Cluster{} + err := p.client.Get(ctx, types.NamespacedName{Name: p.cluster.Name, Namespace: p.cluster.Namespace}, existingCNPG) + switch { + case apierrors.IsNotFound(err): + newCluster := buildCNPGCluster(p.scheme, p.cluster, p.mergedConfig, p.secretName) + if createErr := p.client.Create(ctx, newCluster); createErr != nil { + p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", createErr)) + return createErr + } + p.events.emitNormal(p.cluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") + p.cnpgCluster = newCluster + case err != nil: + return err + default: + p.cnpgCluster = existingCNPG + currentNormalized := normalizeCNPGClusterSpec(p.cnpgCluster.Spec, p.mergedConfig.Spec.PostgreSQLConfig) + desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, p.mergedConfig.Spec.PostgreSQLConfig) + if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { + originalCluster := p.cnpgCluster.DeepCopy() + p.cnpgCluster.Spec = desiredSpec + if patchErr := patchObject(ctx, p.client, originalCluster, p.cnpgCluster, "CNPGCluster"); patchErr != nil { + p.events.emitWarning(p.cluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) + return patchErr + } + p.events.emitNormal(p.cluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") + } + if rolesErr := reconcileManagedRoles(ctx, p.client, p.cluster, p.cnpgCluster); rolesErr != nil { + p.events.emitWarning(p.cluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr)) + return rolesErr + } + } + if p.cnpgCluster != nil { p.cluster.Status.ProvisionerRef = &corev1.ObjectReference{ APIVersion: "postgresql.cnpg.io/v1", @@ -579,7 +407,10 @@ func (p *provisionerModel) Sync(_ context.Context) (componentHealth, error) { UID: p.cnpgCluster.UID, } } + return nil +} +func (p *provisionerModel) Converge(_ context.Context) (componentHealth, error) { p.health.Condition = clusterReady if p.cnpgCluster == nil { @@ -702,17 +533,25 @@ func (p *provisionerModel) Sync(_ context.Context) (componentHealth, error) { type poolerModel struct { client client.Client + scheme *runtime.Scheme + events eventEmitter cluster *enterprisev4.PostgresCluster + mergedConfig *MergedConfig + cnpgCluster *cnpgv1.Cluster poolerEnabled bool poolerConfigPresent bool health componentHealth } -func newPoolerModel(c client.Client, cluster *enterprisev4.PostgresCluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { +func newPoolerModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, cluster *enterprisev4.PostgresCluster, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { return &poolerModel{ client: c, + scheme: scheme, + events: events, cluster: cluster, + mergedConfig: mergedConfig, + cnpgCluster: cnpgCluster, poolerEnabled: poolerEnabled, poolerConfigPresent: poolerConfigPresent, } @@ -720,7 +559,63 @@ func newPoolerModel(c client.Client, cluster *enterprisev4.PostgresCluster, pool func (p *poolerModel) Name() string { return pgcConstants.ComponentPooler } -func (p *poolerModel) Sync(ctx context.Context) (componentHealth, error) { +func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if !p.poolerEnabled || !p.poolerConfigPresent { + return prerequisiteDecision{Allowed: true}, nil + } + if p.cnpgCluster == nil { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Pending, + Condition: poolerReady, + Reason: reasonCNPGProvisioning, + Message: msgCNPGPendingCreation, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + if p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Provisioning, + Condition: poolerReady, + Reason: reasonCNPGProvisioning, + Message: fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase), + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (p *poolerModel) Actuate(ctx context.Context) error { + switch { + case !p.poolerEnabled: + if err := deleteConnectionPoolers(ctx, p.client, p.cluster); err != nil { + return err + } + p.cluster.Status.ConnectionPoolerStatus = nil + meta.RemoveStatusCondition(&p.cluster.Status.Conditions, string(poolerReady)) + return nil + case !p.poolerConfigPresent: + return nil + case p.cnpgCluster == nil || p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy: + return nil + default: + if err := createOrUpdateConnectionPoolers(ctx, p.client, p.scheme, p.cluster, p.mergedConfig, p.cnpgCluster); err != nil { + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) + return err + } + p.events.emitNormal(p.cluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") + return nil + } +} + +func (p *poolerModel) Converge(ctx context.Context) (componentHealth, error) { p.health = componentHealth{Condition: poolerReady} if !p.poolerEnabled { @@ -811,18 +706,75 @@ func (p *poolerModel) Sync(ctx context.Context) (componentHealth, error) { type configMapModel struct { client client.Client + scheme *runtime.Scheme + events eventEmitter + runtime clusterRuntimeView cluster *enterprisev4.PostgresCluster + secret string health componentHealth } -func newConfigMapModel(c client.Client, cluster *enterprisev4.PostgresCluster) *configMapModel { - return &configMapModel{client: c, cluster: cluster} +func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *configMapModel { + return &configMapModel{client: c, scheme: scheme, events: events, runtime: runtime, cluster: cluster, secret: secret} } func (c *configMapModel) Name() string { return pgcConstants.ComponentConfigMap } -func (c *configMapModel) Sync(ctx context.Context) (componentHealth, error) { +func (c *configMapModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if c.runtime == nil || !c.runtime.IsHealthy() { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Provisioning, + Condition: clusterReady, + Reason: reasonCNPGProvisioning, + Message: msgCNPGPendingCreation, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (c *configMapModel) Actuate(ctx context.Context) error { + cnpgCluster := c.runtime.Cluster() + if cnpgCluster == nil { + return nil + } + desiredCM, err := generateConfigMap(ctx, c.client, c.scheme, c.cluster, cnpgCluster, c.secret) + if err != nil { + return err + } + cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} + op, err := controllerutil.CreateOrUpdate(ctx, c.client, cm, func() error { + cm.Data = desiredCM.Data + cm.Annotations = desiredCM.Annotations + cm.Labels = desiredCM.Labels + if !metav1.IsControlledBy(cm, c.cluster) { + if setErr := ctrl.SetControllerReference(c.cluster, cm, c.scheme); setErr != nil { + return fmt.Errorf("setting controller reference: %w", setErr) + } + } + return nil + }) + if err != nil { + c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) + return err + } + if op == controllerutil.OperationResultCreated { + c.events.emitNormal(c.cluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s created", desiredCM.Name)) + } else if op == controllerutil.OperationResultUpdated { + c.events.emitNormal(c.cluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s updated", desiredCM.Name)) + } + if c.cluster.Status.Resources.ConfigMapRef == nil { + c.cluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} + } + return nil +} + +func (c *configMapModel) Converge(ctx context.Context) (componentHealth, error) { c.health = componentHealth{Condition: clusterReady} if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { @@ -880,18 +832,63 @@ func (c *configMapModel) Sync(ctx context.Context) (componentHealth, error) { type secretModel struct { client client.Client + scheme *runtime.Scheme + events eventEmitter cluster *enterprisev4.PostgresCluster + name string health componentHealth } -func newSecretModel(c client.Client, cluster *enterprisev4.PostgresCluster) *secretModel { - return &secretModel{client: c, cluster: cluster} +func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, cluster *enterprisev4.PostgresCluster, name string) *secretModel { + return &secretModel{client: c, scheme: scheme, events: events, cluster: cluster, name: name} } func (s *secretModel) Name() string { return pgcConstants.ComponentSecret } -func (s *secretModel) Sync(ctx context.Context) (componentHealth, error) { +func (s *secretModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + return prerequisiteDecision{Allowed: true}, nil +} + +func (s *secretModel) Actuate(ctx context.Context) error { + secret := &corev1.Secret{} + secretExists, secretErr := clusterSecretExists(ctx, s.client, s.cluster.Namespace, s.name, secret) + if secretErr != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) + return secretErr + } + if !secretExists { + if err := ensureClusterSecret(ctx, s.client, s.scheme, s.cluster, s.name, secret); err != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) + return err + } + s.events.emitNormal(s.cluster, EventSecretReady, fmt.Sprintf("Superuser secret %s created", s.name)) + } + hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), s.cluster, s.scheme) + if ownerRefErr != nil { + return fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + } + if secretExists && !hasOwnerRef { + originalSecret := secret.DeepCopy() + if err := ctrl.SetControllerReference(s.cluster, secret, s.scheme); err != nil { + return fmt.Errorf("failed to set controller reference on existing secret: %w", err) + } + if err := patchObject(ctx, s.client, originalSecret, secret, "Secret"); err != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) + return err + } + s.events.emitNormal(s.cluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", s.name)) + } + if s.cluster.Status.Resources.SuperUserSecretRef == nil { + s.cluster.Status.Resources.SuperUserSecretRef = &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: s.name}, + Key: secretKeyPassword, + } + } + return nil +} + +func (s *secretModel) Converge(ctx context.Context) (componentHealth, error) { s.health = componentHealth{Condition: clusterReady} if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index f0ff5468c..a90e3b19b 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -24,6 +24,11 @@ type configMapNotFoundClient struct { client.Client } +type noopEventEmitter struct{} + +func (noopEventEmitter) emitNormal(_ client.Object, _, _ string) {} +func (noopEventEmitter) emitWarning(_ client.Object, _, _ string) {} + func (c configMapNotFoundClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { if _, ok := obj.(*corev1.ConfigMap); ok { return apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, key.Name) @@ -1196,60 +1201,77 @@ func TestComponentStateTriggerConditions(t *testing.T) { // TODO: as soon as coupling is addressed, remove this monster of a test. combinations := []struct { - name string - syncers []syncer - requeue []bool - expectAll bool - message string + name string + components []componentModel + requeue []bool + expectAll bool + message string }{ { name: "Provisioner ready, pooler pending, sync not successful", - syncers: []syncer{ - newProvisionerModel(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), - newPoolerModel(nil, examplePgCluster.DeepCopy(), true, false), - }, + components: func() []componentModel { + provisioner := newProvisionerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, "pg1-secret") + provisioner.cnpgCluster = &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}} + pooler := newPoolerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, nil, true, false) + return []componentModel{provisioner, pooler} + }(), requeue: []bool{false, false}, expectAll: false, message: "Provisioner is ready but pooler is pending, don't fire", }, { name: "Provisioner ready, pooler ready, configMap failed, sync not successful", - syncers: []syncer{ - newProvisionerModel(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), - newPoolerModel(nil, examplePgCluster.DeepCopy(), false, false), - newConfigMapModel( + components: func() []componentModel { + provisioner := newProvisionerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, "pg1-secret") + provisioner.cnpgCluster = &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}} + pooler := newPoolerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, nil, false, false) + configMap := newConfigMapModel( configMapNotFoundClient{ Client: fake.NewClientBuilder(). WithScheme(scheme). Build(), }, + nil, + noopEventEmitter{}, + nil, examplePgCluster.DeepCopy(), - ), - }, + "pg1-secret", + ) + return []componentModel{provisioner, pooler, configMap} + }(), requeue: []bool{false, false, true}, expectAll: false, message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", }, { name: "Sync successful, all components ready.", - syncers: []syncer{ - newProvisionerModel(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), - newPoolerModel(nil, examplePgCluster.DeepCopy(), false, false), - newConfigMapModel( + components: func() []componentModel { + provisioner := newProvisionerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, "pg1-secret") + provisioner.cnpgCluster = &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}} + pooler := newPoolerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, nil, false, false) + configMap := newConfigMapModel( fake.NewClientBuilder(). WithScheme(scheme). WithObjects(exampleCm). Build(), + nil, + noopEventEmitter{}, + nil, examplePgCluster.DeepCopy(), - ), - newSecretModel( + "pg1-secret", + ) + secret := newSecretModel( fake.NewClientBuilder(). WithScheme(scheme). WithObjects(exampleSecret). Build(), + nil, + noopEventEmitter{}, examplePgCluster.DeepCopy(), - ), - }, + "pg1-secret", + ) + return []componentModel{provisioner, pooler, configMap, secret} + }(), requeue: []bool{false, false, false, false}, expectAll: true, message: "", @@ -1261,8 +1283,8 @@ func TestComponentStateTriggerConditions(t *testing.T) { t.Parallel() state := pgcConstants.Empty - for i, check := range tt.syncers { - info, _ := check.Sync(ctx) + for i, check := range tt.components { + info, _ := check.Converge(ctx) state = info.State assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) } From abdc1ab171c4fe2f9df6032101a810d16e5c77c9 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Mon, 13 Apr 2026 10:47:09 +0200 Subject: [PATCH 05/10] logging align --- pkg/postgresql/cluster/core/cluster.go | 57 +++++++++++++++++--------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 513982c2f..6a9775f62 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -149,13 +149,22 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } phase := func(component componentModel) (ctrl.Result, error) { + componentLogger := logger.WithValues("component", component.Name()) gate, gateErr := component.EvaluatePrerequisites(ctx) if gateErr != nil { + componentLogger.Error(gateErr, "Component prerequisite evaluation failed", "step", "prerequisites") return ctrl.Result{}, fmt.Errorf("%s prerequisites: %w", component.Name(), gateErr) } if !gate.Allowed { - result, healthErr := manageComponentHealth(ctx, component.Name(), updateStatus, gate.Health, nil) + componentLogger.Info("Component blocked by prerequisites", + "step", "prerequisites", + "condition", gate.Health.Condition, + "reason", gate.Health.Reason, + "phase", gate.Health.Phase, + "requeueAfter", gate.Health.Result.RequeueAfter) + result, healthErr := manageComponentHealth(ctx, updateStatus, gate.Health, nil) if healthErr != nil { + componentLogger.Error(healthErr, "Failed updating status from prerequisite gate", "step", "status") return result, healthErr } if result != (ctrl.Result{}) { @@ -165,20 +174,46 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } if err := component.Actuate(ctx); err != nil { + componentLogger.Error(err, "Component actuation failed", "step", "actuate") return ctrl.Result{}, fmt.Errorf("%s actuate: %w", component.Name(), err) } + componentLogger.Info("Component actuation completed", "step", "actuate") health, err := component.Converge(ctx) - result, healthErr := manageComponentHealth(ctx, component.Name(), updateStatus, health, err) + result, healthErr := manageComponentHealth(ctx, updateStatus, health, err) if healthErr != nil { + componentLogger.Error(healthErr, "Failed updating status from component health", "step", "status") return result, healthErr } if err != nil { + componentLogger.Error(err, "Component convergence failed", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase) return health.Result, fmt.Errorf("%s converge: %w", component.Name(), err) } if isPendingState(health.State) { + componentLogger.Info("Component convergence pending", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase, + "requeueAfter", health.Result.RequeueAfter) return health.Result, nil } + componentLogger.Info("Component convergence ready", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase) + if result != (ctrl.Result{}) { + componentLogger.Info("Component requested explicit result", + "step", "converge", + "requeue", result.Requeue, + "requeueAfter", result.RequeueAfter) + return result, nil + } return result, nil } @@ -224,13 +259,10 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. func manageComponentHealth( ctx context.Context, - componentName string, updateStatus func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error, health componentHealth, syncErr error, ) (ctrl.Result, error) { - logger := log.FromContext(ctx) - if syncErr != nil { if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { if apierrors.IsConflict(statusErr) { @@ -242,12 +274,6 @@ func manageComponentHealth( } if isPendingState(health.State) { - logger.Info("Component sync pending", - "component", componentName, - "requeueAfter", health.Result.RequeueAfter, - "condition", health.Condition, - "reason", health.Reason, - "phase", health.Phase) if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { if apierrors.IsConflict(statusErr) { return ctrl.Result{Requeue: true}, nil @@ -256,12 +282,6 @@ func manageComponentHealth( } return health.Result, nil } - - logger.Info("Component sync ready", - "component", componentName, - "condition", health.Condition, - "reason", health.Reason, - "phase", health.Phase) return ctrl.Result{}, nil } @@ -1489,15 +1509,12 @@ func removeOwnerRef(scheme *runtime.Scheme, owner, obj client.Object) (bool, err // patchObject patches obj from original; treats NotFound as a no-op. func patchObject(ctx context.Context, c client.Client, original, obj client.Object, kind objectKind) error { - logger := log.FromContext(ctx) if err := c.Patch(ctx, obj, client.MergeFrom(original)); err != nil { if apierrors.IsNotFound(err) { - logger.Info("Object not found, skipping patch", "kind", kind, "name", obj.GetName()) return nil } return fmt.Errorf("patching %s: %w", kind, err) } - logger.Info("Object patched", "kind", kind, "name", obj.GetName()) return nil } From 5bd09cd4aa62c6af682d85c622f0bf5a679aeced Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Mon, 13 Apr 2026 11:07:35 +0200 Subject: [PATCH 06/10] cluster unit adj --- .../cluster/core/cluster_unit_test.go | 127 ++++++++++++++---- 1 file changed, 104 insertions(+), 23 deletions(-) diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index a90e3b19b..3b97de409 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -1161,6 +1161,8 @@ func TestComponentStateTriggerConditions(t *testing.T) { ctx := t.Context() scheme := runtime.NewScheme() require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) exampleCm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ @@ -1199,6 +1201,47 @@ func TestComponentStateTriggerConditions(t *testing.T) { }, } + instances := int32(1) + version := "16" + storageSize := resource.MustParse("10Gi") + mergedConfig := &MergedConfig{ + Spec: &enterprisev4.PostgresClusterSpec{ + Instances: &instances, + PostgresVersion: &version, + Storage: &storageSize, + Resources: &corev1.ResourceRequirements{}, + PostgreSQLConfig: map[string]string{}, + PgHBA: []string{}, + }, + } + + makeReadyProvisioner := func(cluster *enterprisev4.PostgresCluster) *provisionerModel { + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name, + Namespace: cluster.Namespace, + }, + Spec: buildCNPGClusterSpec(mergedConfig, "pg1-secret"), + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + }, + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cnpg).Build() + return newProvisionerModel(c, scheme, noopEventEmitter{}, cluster, mergedConfig, "pg1-secret") + } + + makeRuntimeView := func(healthy bool) clusterRuntimeView { + if !healthy { + return provisionerRuntimeView{model: &provisionerModel{}} + } + return provisionerRuntimeView{model: &provisionerModel{ + cnpgCluster: &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + }, + }} + } + // TODO: as soon as coupling is addressed, remove this monster of a test. combinations := []struct { name string @@ -1208,33 +1251,51 @@ func TestComponentStateTriggerConditions(t *testing.T) { message string }{ { - name: "Provisioner ready, pooler pending, sync not successful", + name: "Provisioner ready, pooler blocked by prerequisites", components: func() []componentModel { - provisioner := newProvisionerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, "pg1-secret") - provisioner.cnpgCluster = &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}} - pooler := newPoolerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, nil, true, false) + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + cluster, + mergedConfig, + nil, + true, + true, + ) return []componentModel{provisioner, pooler} }(), - requeue: []bool{false, false}, + requeue: []bool{false, true}, expectAll: false, - message: "Provisioner is ready but pooler is pending, don't fire", + message: "Provisioner ready but pooler gate is blocked until CNPG is healthy", }, { - name: "Provisioner ready, pooler ready, configMap failed, sync not successful", + name: "Provisioner ready, pooler ready, configMap pending from NotFound", components: func() []componentModel { - provisioner := newProvisionerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, "pg1-secret") - provisioner.cnpgCluster = &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}} - pooler := newPoolerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, nil, false, false) + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + cluster, + mergedConfig, + nil, + false, + false, + ) configMap := newConfigMapModel( configMapNotFoundClient{ Client: fake.NewClientBuilder(). WithScheme(scheme). Build(), }, - nil, + scheme, noopEventEmitter{}, - nil, - examplePgCluster.DeepCopy(), + makeRuntimeView(true), + cluster, "pg1-secret", ) return []componentModel{provisioner, pooler, configMap} @@ -1244,20 +1305,29 @@ func TestComponentStateTriggerConditions(t *testing.T) { message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", }, { - name: "Sync successful, all components ready.", + name: "Flow successful, all components ready", components: func() []componentModel { - provisioner := newProvisionerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, "pg1-secret") - provisioner.cnpgCluster = &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}} - pooler := newPoolerModel(nil, nil, noopEventEmitter{}, examplePgCluster.DeepCopy(), nil, nil, false, false) + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + cluster, + mergedConfig, + nil, + false, + false, + ) configMap := newConfigMapModel( fake.NewClientBuilder(). WithScheme(scheme). WithObjects(exampleCm). Build(), - nil, + scheme, noopEventEmitter{}, - nil, - examplePgCluster.DeepCopy(), + makeRuntimeView(true), + cluster, "pg1-secret", ) secret := newSecretModel( @@ -1265,9 +1335,9 @@ func TestComponentStateTriggerConditions(t *testing.T) { WithScheme(scheme). WithObjects(exampleSecret). Build(), - nil, + scheme, noopEventEmitter{}, - examplePgCluster.DeepCopy(), + cluster, "pg1-secret", ) return []componentModel{provisioner, pooler, configMap, secret} @@ -1284,7 +1354,18 @@ func TestComponentStateTriggerConditions(t *testing.T) { state := pgcConstants.Empty for i, check := range tt.components { - info, _ := check.Converge(ctx) + gate, gateErr := check.EvaluatePrerequisites(ctx) + require.NoError(t, gateErr) + if !gate.Allowed { + info := gate.Health + state = info.State + assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) + continue + } + + require.NoError(t, check.Actuate(ctx)) + info, err := check.Converge(ctx) + require.NoError(t, err) state = info.State assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) } From 489e0b97bd1da9064e18171a9801f99cb6a1d770 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Mon, 13 Apr 2026 11:47:12 +0200 Subject: [PATCH 07/10] merge adjustments --- .../postgrescluster_controller_test.go | 2 ++ .../postgresdatabase_controller_test.go | 2 ++ pkg/postgresql/cluster/core/cluster.go | 28 +------------------ 3 files changed, 5 insertions(+), 27 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 9622068b6..ee05f30af 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -36,6 +36,7 @@ import ( enterprisev4 "github.com/splunk/splunk-operator/api/v4" "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" ) /* @@ -125,6 +126,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Client: k8sClient, Scheme: k8sClient.Scheme(), Recorder: record.NewFakeRecorder(100), + Metrics: &prometheus.NoopRecorder{}, } req = reconcile.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: namespace}} }) diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index 31f591573..fb450738e 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -26,6 +26,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -83,6 +84,7 @@ func reconcilePostgresDatabase(ctx context.Context, nn types.NamespacedName) (ct Client: k8sClient, Scheme: k8sClient.Scheme(), Recorder: record.NewFakeRecorder(100), + Metrics: &prometheus.NoopRecorder{}, } return reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: nn}) } diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 1150945c7..513ecebcd 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -25,6 +25,7 @@ import ( password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -1239,33 +1240,6 @@ func deleteConnectionPoolers(ctx context.Context, c client.Client, cluster *ente return nil } -// syncPoolerStatus populates ConnectionPoolerStatus and the PoolerReady condition. -func syncPoolerStatus(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster) error { - rwPooler := &cnpgv1.Pooler{} - if err := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(cluster.Name, readWriteEndpoint), - Namespace: cluster.Namespace, - }, rwPooler); err != nil { - return err - } - - roPooler := &cnpgv1.Pooler{} - if err := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(cluster.Name, readOnlyEndpoint), - Namespace: cluster.Namespace, - }, roPooler); err != nil { - return err - } - - cluster.Status.ConnectionPoolerStatus = &enterprisev4.ConnectionPoolerStatus{Enabled: true} - rwDesired, rwScheduled := poolerInstanceCount(rwPooler) - roDesired, roScheduled := poolerInstanceCount(roPooler) - - return setStatus(ctx, c, metrics, cluster, poolerReady, metav1.ConditionTrue, reasonAllInstancesReady, - fmt.Sprintf("%s: %d/%d, %s: %d/%d", readWriteEndpoint, rwScheduled, rwDesired, readOnlyEndpoint, roScheduled, roDesired), - readyClusterPhase) -} - // setStatus sets the phase, condition and persists the status. // It skips the API write when the resulting status is identical to the current // state, avoiding unnecessary etcd churn and ResourceVersion bumps on stable clusters. From 7663e101c353e1b02b0656674217ba433087aad3 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Mon, 13 Apr 2026 12:51:51 +0200 Subject: [PATCH 08/10] event emmision placed back, fortified with tests --- .../postgrescluster_controller_test.go | 35 +++++++++++++++++-- pkg/postgresql/cluster/core/cluster.go | 13 ++++++- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index ee05f30af..8aad8d0b6 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -19,7 +19,9 @@ package controller import ( "context" "fmt" + "strings" + v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" @@ -52,6 +54,18 @@ import ( * PC-09 ignores no-op updates */ +func containsEvent(events []string, recorder *record.FakeRecorder, eventType string, event string) bool { + for { + select { + case e := <-recorder.Events: + events = append(events, e) + return strings.Contains(e, eventType) && strings.Contains(e, event) + default: + return false + } + } +} + var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { const ( @@ -77,6 +91,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { pgClusterClassKey types.NamespacedName reconciler *PostgresClusterReconciler req reconcile.Request + fakeRecorder *record.FakeRecorder ) reconcileNTimes := func(times int) { @@ -121,11 +136,11 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { ClusterDeletionPolicy: &[]string{deletePolicy}[0], }, } - + fakeRecorder = record.NewFakeRecorder(100) reconciler = &PostgresClusterReconciler{ Client: k8sClient, Scheme: k8sClient.Scheme(), - Recorder: record.NewFakeRecorder(100), + Recorder: fakeRecorder, Metrics: &prometheus.NoopRecorder{}, } req = reconcile.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: namespace}} @@ -222,6 +237,13 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(pc.Status.Resources).NotTo(BeNil()) Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) + + received := make([]string, 0, 8) + Eventually(func() bool { + return containsEvent( + received, fakeRecorder, + v1.EventTypeNormal, core.EventClusterReady) + }, "5s", "100ms").Should(BeTrue(), "events seen: %v", received) }) // PC-07 @@ -290,7 +312,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { When("reconciling with invalid or drifted dependencies", func() { // PC-05 Context("when referenced class does not exist", func() { - It("fails with class-not-found condition", func() { + It("fails with class-not-found condition and emits a warning event", func() { badName := "bad-" + clusterName badKey := types.NamespacedName{Name: badName, Namespace: namespace} @@ -315,6 +337,13 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cond := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") return cond != nil && cond.Reason == "ClusterClassNotFound" }, "20s", "250ms").Should(BeTrue()) + + received := make([]string, 0, 8) + Eventually(func() bool { + return containsEvent( + received, fakeRecorder, + v1.EventTypeWarning, core.EventClusterClassNotFound) + }, "5s", "100ms").Should(BeTrue(), "events seen: %v", received) }) }) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 513ecebcd..d403c9ea4 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -211,7 +211,6 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if result != (ctrl.Result{}) { componentLogger.Info("Component requested explicit result", "step", "converge", - "requeue", result.Requeue, "requeueAfter", result.RequeueAfter) return result, nil } @@ -249,12 +248,21 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rc.emitPoolerReadyTransition(postgresCluster, oldConditions) logger.Info("Reconciliation complete") + var oldPhase string + if postgresCluster.Status.Phase != nil { + oldPhase = *postgresCluster.Status.Phase + } if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { if apierrors.IsConflict(err) { return ctrl.Result{Requeue: true}, nil } return ctrl.Result{}, err } + var newPhase string + if postgresCluster.Status.Phase != nil { + newPhase = *postgresCluster.Status.Phase + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, newPhase) return ctrl.Result{}, nil } @@ -659,6 +667,7 @@ func (p *poolerModel) Converge(ctx context.Context) (componentHealth, error) { // TODO: Port material. rwExists, err := poolerExists(ctx, p.client, p.cluster, readWriteEndpoint) if err != nil { + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) p.health.State = pgcConstants.Failed p.health.Reason = reasonPoolerReconciliationFailed p.health.Message = fmt.Sprintf("Failed to check RW pooler existence: %v", err) @@ -668,6 +677,7 @@ func (p *poolerModel) Converge(ctx context.Context) (componentHealth, error) { } roExists, err := poolerExists(ctx, p.client, p.cluster, readOnlyEndpoint) if err != nil { + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) p.health.State = pgcConstants.Failed p.health.Reason = reasonPoolerReconciliationFailed p.health.Message = fmt.Sprintf("Failed to check RO pooler existence: %v", err) @@ -766,6 +776,7 @@ func (c *configMapModel) Actuate(ctx context.Context) error { } desiredCM, err := generateConfigMap(ctx, c.client, c.scheme, c.cluster, cnpgCluster, c.secret) if err != nil { + c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) return err } cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} From cc4f6b9a83d3266dae747fc2dbc50bd3eceadce9 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Tue, 14 Apr 2026 12:39:41 +0200 Subject: [PATCH 09/10] allign with requirements on state building --- .../postgrescluster_controller_test.go | 41 ++ pkg/postgresql/cluster/core/cluster.go | 520 ++++++++++++++---- .../cluster/core/cluster_unit_test.go | 345 +++++++++++- pkg/postgresql/cluster/core/types.go | 11 +- .../core/types/constants/components.go | 9 +- pkg/postgresql/database/core/types.go | 10 +- 6 files changed, 816 insertions(+), 120 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 8aad8d0b6..890f69f04 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -134,6 +134,10 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Spec: enterprisev4.PostgresClusterSpec{ Class: className, ClusterDeletionPolicy: &[]string{deletePolicy}[0], + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_user_rw", Exists: true}, + }, }, } fakeRecorder = record.NewFakeRecorder(100) @@ -225,6 +229,11 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.Phase = cnpgv1.PhaseHealthy + cnpg.Status.ManagedRolesStatus = cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, + }, + } Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) reconcileNTimes(1) @@ -234,6 +243,38 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond).NotTo(BeNil()) Expect(cond.Status).To(Equal(metav1.ConditionTrue)) Expect(cond.Reason).To(Equal("CNPGClusterHealthy")) + + secretCond := meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") + Expect(secretCond).NotTo(BeNil()) + Expect(secretCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(secretCond.Reason).To(Equal("ClusterBuildSucceeded")) + + configMapCond := meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") + Expect(configMapCond).NotTo(BeNil()) + Expect(configMapCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(configMapCond.Reason).To(Equal("ClusterBuildSucceeded")) + + managedRolesCond := meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") + Expect(managedRolesCond).NotTo(BeNil()) + Expect(managedRolesCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(managedRolesCond.Reason).To(Equal("ManagedRolesReady")) + + // Pooler is disabled in this suite fixture, but converge publishes PoolerReady=True with disabled message. + poolerCond := meta.FindStatusCondition(pc.Status.Conditions, "PoolerReady") + Expect(poolerCond).NotTo(BeNil()) + Expect(poolerCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(poolerCond.Reason).To(Equal("AllInstancesReady")) + Expect(poolerCond.Message).To(Equal("Connection pooler disabled")) + + Expect(pc.Status.ManagedRolesStatus).NotTo(BeNil()) + Expect(pc.Status.ManagedRolesStatus.Reconciled).To(ContainElements("app_user", "app_user_rw")) + + Expect(pc.Status.Phase).NotTo(BeNil()) + Expect(*pc.Status.Phase).To(Equal("Ready")) + Expect(pc.Status.ProvisionerRef).NotTo(BeNil()) + Expect(pc.Status.ProvisionerRef.Kind).To(Equal("Cluster")) + Expect(pc.Status.ProvisionerRef.Name).To(Equal(clusterName)) + Expect(pc.Status.Resources).NotTo(BeNil()) Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index d403c9ea4..ab3cb2a82 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -20,6 +20,8 @@ import ( "context" "errors" "fmt" + "sort" + "strings" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" password "github.com/sethvargo/go-password/password" @@ -68,8 +70,36 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger = logger.WithValues("postgresCluster", postgresCluster.Name) ctx = log.IntoContext(ctx, logger) + currentPhase := func() string { + if postgresCluster.Status.Phase == nil { + return "" + } + return *postgresCluster.Status.Phase + } + updateStatus := func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { - return setStatus(ctx, c, rc.Metrics, postgresCluster, conditionType, status, reason, message, phase) + oldPhase := currentPhase() + if err := setStatus(ctx, c, rc.Metrics, postgresCluster, conditionType, status, reason, message, phase); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil + } + updateComponentHealthStatus := func(health componentHealth) error { + oldPhase := currentPhase() + if err := setStatusFromHealth(ctx, c, rc.Metrics, postgresCluster, health); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil + } + updatePhaseStatus := func(phase reconcileClusterPhases) error { + oldPhase := currentPhase() + if err := setPhaseStatus(ctx, c, postgresCluster, phase); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil } // Finalizer handling must come before any other processing. @@ -144,15 +174,22 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil + secretComponent := newSecretModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, postgresSecretName) + provisionerComponent := newProvisionerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, mergedConfig, postgresSecretName) + components := []componentModel{ - newSecretModel(c, rc.Scheme, rc, postgresCluster, postgresSecretName), - newProvisionerModel(c, rc.Scheme, rc, postgresCluster, mergedConfig, postgresSecretName), + secretComponent, + provisionerComponent, } phase := func(component componentModel) (ctrl.Result, error) { componentLogger := logger.WithValues("component", component.Name()) gate, gateErr := component.EvaluatePrerequisites(ctx) if gateErr != nil { + if isTransientError(gateErr) { + componentLogger.Error(gateErr, "Component prerequisite transient error, requeueing", "step", "prerequisites") + return transientResult(gateErr), nil + } componentLogger.Error(gateErr, "Component prerequisite evaluation failed", "step", "prerequisites") return ctrl.Result{}, fmt.Errorf("%s prerequisites: %w", component.Name(), gateErr) } @@ -163,29 +200,33 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. "reason", gate.Health.Reason, "phase", gate.Health.Phase, "requeueAfter", gate.Health.Result.RequeueAfter) - result, healthErr := manageComponentHealth(ctx, updateStatus, gate.Health, nil) - if healthErr != nil { - componentLogger.Error(healthErr, "Failed updating status from prerequisite gate", "step", "status") - return result, healthErr + health, err := component.Converge(ctx) + if err != nil && isTransientError(err) { + return transientResult(err), nil } - if result != (ctrl.Result{}) { - return result, nil + if err != nil { + componentLogger.Error(err, "Blocked component convergence failed", "step", "converge") + return health.Result, fmt.Errorf("%s converge (blocked): %w", component.Name(), err) } - return gate.Health.Result, nil + return health.Result, nil } if err := component.Actuate(ctx); err != nil { + if isTransientError(err) { + componentLogger.Error(err, "Component actuation transient error, requeueing", "step", "actuate") + return transientResult(err), nil + } componentLogger.Error(err, "Component actuation failed", "step", "actuate") return ctrl.Result{}, fmt.Errorf("%s actuate: %w", component.Name(), err) } componentLogger.Info("Component actuation completed", "step", "actuate") health, err := component.Converge(ctx) - result, healthErr := manageComponentHealth(ctx, updateStatus, health, err) - if healthErr != nil { - componentLogger.Error(healthErr, "Failed updating status from component health", "step", "status") - return result, healthErr + if err != nil && isTransientError(err) { + componentLogger.Error(err, "Component convergence transient error, requeueing", "step", "converge") + return transientResult(err), nil } + if err != nil { componentLogger.Error(err, "Component convergence failed", "step", "converge", @@ -194,7 +235,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. "phase", health.Phase) return health.Result, fmt.Errorf("%s converge: %w", component.Name(), err) } - if isPendingState(health.State) { + if isIntermediateState(health.State) { componentLogger.Info("Component convergence pending", "step", "converge", "condition", health.Condition, @@ -208,13 +249,13 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. "condition", health.Condition, "reason", health.Reason, "phase", health.Phase) - if result != (ctrl.Result{}) { + if health.Result != (ctrl.Result{}) { componentLogger.Info("Component requested explicit result", "step", "converge", - "requeueAfter", result.RequeueAfter) - return result, nil + "requeueAfter", health.Result.RequeueAfter) + return health.Result, nil } - return result, nil + return ctrl.Result{}, nil } for _, component := range components { @@ -227,13 +268,13 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } } - provisionerComponent := components[1].(*provisionerModel) cnpgCluster = provisionerComponent.cnpgCluster runtimeView := provisionerRuntimeView{model: provisionerComponent} oldConditions := append([]metav1.Condition(nil), postgresCluster.Status.Conditions...) runtimeComponents := []componentModel{ - newPoolerModel(c, rc.Scheme, rc, postgresCluster, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), - newConfigMapModel(c, rc.Scheme, rc, runtimeView, postgresCluster, postgresSecretName), + newManagedRolesModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + newPoolerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), + newConfigMapModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), } for _, component := range runtimeComponents { @@ -248,50 +289,34 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rc.emitPoolerReadyTransition(postgresCluster, oldConditions) logger.Info("Reconciliation complete") - var oldPhase string - if postgresCluster.Status.Phase != nil { - oldPhase = *postgresCluster.Status.Phase - } - if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { + if err := updatePhaseStatus(readyClusterPhase); err != nil { if apierrors.IsConflict(err) { return ctrl.Result{Requeue: true}, nil } return ctrl.Result{}, err } - var newPhase string - if postgresCluster.Status.Phase != nil { - newPhase = *postgresCluster.Status.Phase - } - rc.emitClusterPhaseTransition(postgresCluster, oldPhase, newPhase) return ctrl.Result{}, nil } -func manageComponentHealth( - ctx context.Context, - updateStatus func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error, - health componentHealth, - syncErr error, -) (ctrl.Result, error) { - if syncErr != nil { - if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { - if apierrors.IsConflict(statusErr) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, statusErr - } - return health.Result, syncErr +func isTransientError(err error) bool { + return apierrors.IsConflict(err) || + apierrors.IsServerTimeout(err) || + apierrors.IsTooManyRequests(err) || + apierrors.IsTimeout(err) +} + +func transientResult(err error) ctrl.Result { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true} } + return ctrl.Result{RequeueAfter: retryDelay} +} - if isPendingState(health.State) { - if statusErr := updateStatus(health.Condition, metav1.ConditionFalse, health.Reason, health.Message, health.Phase); statusErr != nil { - if apierrors.IsConflict(statusErr) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, statusErr - } - return health.Result, nil +func writeComponentStatus(updateStatus healthStatusUpdater, health componentHealth) error { + if updateStatus == nil { + return nil } - return ctrl.Result{}, nil + return updateStatus(health) } // types/dto candidate @@ -336,6 +361,8 @@ type prerequisiteDecision struct { Health componentHealth } +type healthStatusUpdater func(health componentHealth) error + type eventEmitter interface { emitNormal(obj client.Object, reason, message string) emitWarning(obj client.Object, reason, message string) @@ -362,38 +389,37 @@ type provisionerModel struct { client client.Client scheme *runtime.Scheme events eventEmitter + updateStatus healthStatusUpdater cluster *enterprisev4.PostgresCluster mergedConfig *MergedConfig secretName string cnpgCluster *cnpgv1.Cluster + cnpgCreated bool + cnpgPatched bool health componentHealth } -func newProvisionerModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, cluster *enterprisev4.PostgresCluster, mergedConfig *MergedConfig, secretName string) *provisionerModel { - return &provisionerModel{client: c, scheme: scheme, events: events, cluster: cluster, mergedConfig: mergedConfig, secretName: secretName} +func newProvisionerModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, mergedConfig *MergedConfig, secretName string) *provisionerModel { + return &provisionerModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, cluster: cluster, mergedConfig: mergedConfig, secretName: secretName} } func (p *provisionerModel) Name() string { return pgcConstants.ComponentProvisioner } func (p *provisionerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { - if p.cluster.Status.Resources == nil || p.cluster.Status.Resources.SuperUserSecretRef == nil { + if health, missing := p.getHealthOnMissingSecretRef(); missing { return prerequisiteDecision{ Allowed: false, - Health: componentHealth{ - State: pgcConstants.Pending, - Condition: clusterReady, - Reason: reasonUserSecretFailed, - Message: msgSecretRefNotPublished, - Phase: pendingClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, + Health: health, }, nil } return prerequisiteDecision{Allowed: true}, nil } func (p *provisionerModel) Actuate(ctx context.Context) error { + p.cnpgCreated = false + p.cnpgPatched = false + desiredSpec := buildCNPGClusterSpec(p.mergedConfig, p.secretName) existingCNPG := &cnpgv1.Cluster{} err := p.client.Get(ctx, types.NamespacedName{Name: p.cluster.Name, Namespace: p.cluster.Namespace}, existingCNPG) @@ -406,6 +432,7 @@ func (p *provisionerModel) Actuate(ctx context.Context) error { } p.events.emitNormal(p.cluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") p.cnpgCluster = newCluster + p.cnpgCreated = true case err != nil: return err default: @@ -420,10 +447,7 @@ func (p *provisionerModel) Actuate(ctx context.Context) error { return patchErr } p.events.emitNormal(p.cluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") - } - if rolesErr := reconcileManagedRoles(ctx, p.client, p.cluster, p.cnpgCluster); rolesErr != nil { - p.events.emitWarning(p.cluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr)) - return rolesErr + p.cnpgPatched = true } } @@ -439,8 +463,24 @@ func (p *provisionerModel) Actuate(ctx context.Context) error { return nil } -func (p *provisionerModel) Converge(_ context.Context) (componentHealth, error) { +func (p *provisionerModel) Converge(_ context.Context) (health componentHealth, err error) { p.health.Condition = clusterReady + defer func() { + statusErr := writeComponentStatus(p.updateStatus, p.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = p.health + }() + + if missingHealth, missing := p.getHealthOnMissingSecretRef(); missing { + p.health = missingHealth + return p.health, nil + } if p.cnpgCluster == nil { p.health.State = pgcConstants.Pending @@ -451,6 +491,24 @@ func (p *provisionerModel) Converge(_ context.Context) (componentHealth, error) return p.health, nil } + if p.cnpgCreated { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + if p.cnpgPatched { + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + switch p.cnpgCluster.Status.Phase { case cnpgv1.PhaseHealthy: p.health.State = pgcConstants.Ready @@ -560,10 +618,193 @@ func (p *provisionerModel) Converge(_ context.Context) (componentHealth, error) } } +func (p *provisionerModel) getHealthOnMissingSecretRef() (componentHealth, bool) { + if p.cluster.Status.Resources == nil || p.cluster.Status.Resources.SuperUserSecretRef == nil { + return componentHealth{ + State: pgcConstants.Pending, + Condition: clusterReady, + Reason: reasonUserSecretPending, + Message: msgSecretRefNotPublished, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, true + } + return componentHealth{}, false +} + +type managedRolesModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + runtime clusterRuntimeView + cluster *enterprisev4.PostgresCluster + secret string + + health componentHealth +} + +func newManagedRolesModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *managedRolesModel { + return &managedRolesModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, runtime: runtime, cluster: cluster, secret: secret} +} + +func (m *managedRolesModel) Name() string { return pgcConstants.ComponentManagedRoles } + +func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if m.runtime == nil || !m.runtime.IsHealthy() { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Failed, + Condition: managedRolesReady, + Reason: reasonManagedRolesFailed, + Message: "Managed roles blocked until CNPG cluster is healthy", + Phase: failedClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (m *managedRolesModel) Actuate(ctx context.Context) error { + if rolesErr := reconcileManagedRoles(ctx, m.client, m.cluster, m.runtime.Cluster()); rolesErr != nil { + m.events.emitWarning(m.cluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr)) + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr) + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{} + return rolesErr + } + return nil +} + +func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealth, err error) { + _ = ctx + m.health = componentHealth{Condition: managedRolesReady} + defer func() { + statusErr := writeComponentStatus(m.updateStatus, m.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = m.health + }() + + if m.runtime == nil || !m.runtime.IsHealthy() { + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = "Managed roles blocked until CNPG cluster is healthy" + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return m.health, fmt.Errorf("managed roles blocked until CNPG cluster is healthy") + } + + syncManagedRolesStatusFromCNPG(m.cluster, m.runtime.Cluster()) + status := m.cluster.Status.ManagedRolesStatus + if status == nil { + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = "Managed roles status not published yet" + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return m.health, fmt.Errorf("managed roles status not published") + } + + if len(status.Failed) > 0 { + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = fmt.Sprintf("Managed roles reconciliation failed for %d role(s)", len(status.Failed)) + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return m.health, fmt.Errorf("managed roles have failed entries") + } + + if len(status.Pending) > 0 { + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = fmt.Sprintf("Managed roles pending for %d role(s)", len(status.Pending)) + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return m.health, fmt.Errorf("managed roles have pending entries") + } + + m.health.State = pgcConstants.Ready + m.health.Reason = reasonManagedRolesReady + m.health.Message = "Managed roles are reconciled" + m.health.Phase = readyClusterPhase + m.health.Result = ctrl.Result{} + return m.health, nil +} + +// TODO: Ports as access to cnpg originated info to decouple. +func syncManagedRolesStatusFromCNPG(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) { + if cluster == nil || cnpgCluster == nil { + return + } + + expectedRoles := make([]string, 0, len(cluster.Spec.ManagedRoles)) + for _, role := range cluster.Spec.ManagedRoles { + expectedRoles = append(expectedRoles, role.Name) + } + + cnpgStatus := cnpgCluster.Status.ManagedRolesStatus + reconciled := append([]string(nil), cnpgStatus.ByStatus[cnpgv1.RoleStatusReconciled]...) + pending := append([]string(nil), cnpgStatus.ByStatus[cnpgv1.RoleStatusPendingReconciliation]...) + + reconciledSet := make(map[string]struct{}, len(reconciled)) + for _, roleName := range reconciled { + reconciledSet[roleName] = struct{}{} + } + pendingSet := make(map[string]struct{}, len(pending)) + for _, roleName := range pending { + pendingSet[roleName] = struct{}{} + } + + failed := make(map[string]string, len(cnpgStatus.CannotReconcile)) + for roleName, errs := range cnpgStatus.CannotReconcile { + if len(errs) == 0 { + failed[roleName] = "role cannot be reconciled" + continue + } + failed[roleName] = strings.Join(errs, "; ") + } + + for _, roleName := range expectedRoles { + if _, ok := reconciledSet[roleName]; ok { + continue + } + if _, ok := failed[roleName]; ok { + continue + } + if _, ok := pendingSet[roleName]; ok { + continue + } + pending = append(pending, roleName) + } + + sort.Strings(reconciled) + sort.Strings(pending) + if len(failed) == 0 { + failed = nil + } + + cluster.Status.ManagedRolesStatus = &enterprisev4.ManagedRolesStatus{ + Reconciled: reconciled, + Pending: pending, + Failed: failed, + } +} + type poolerModel struct { client client.Client scheme *runtime.Scheme events eventEmitter + updateStatus healthStatusUpdater cluster *enterprisev4.PostgresCluster mergedConfig *MergedConfig cnpgCluster *cnpgv1.Cluster @@ -573,11 +814,12 @@ type poolerModel struct { health componentHealth } -func newPoolerModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, cluster *enterprisev4.PostgresCluster, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { +func newPoolerModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { return &poolerModel{ client: c, scheme: scheme, events: events, + updateStatus: updateStatus, cluster: cluster, mergedConfig: mergedConfig, cnpgCluster: cnpgCluster, @@ -644,8 +886,19 @@ func (p *poolerModel) Actuate(ctx context.Context) error { } } -func (p *poolerModel) Converge(ctx context.Context) (componentHealth, error) { +func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err error) { p.health = componentHealth{Condition: poolerReady} + defer func() { + statusErr := writeComponentStatus(p.updateStatus, p.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = p.health + }() if !p.poolerEnabled { p.health.State = pgcConstants.Ready @@ -663,6 +916,22 @@ func (p *poolerModel) Converge(ctx context.Context) (componentHealth, error) { p.health.Result = ctrl.Result{} return p.health, fmt.Errorf("pooler config missing") } + if p.cnpgCluster == nil { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + if p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } // TODO: Port material. rwExists, err := poolerExists(ctx, p.client, p.cluster, readWriteEndpoint) @@ -727,6 +996,7 @@ func (p *poolerModel) Converge(ctx context.Context) (componentHealth, error) { return p.health, nil } + p.cluster.Status.ConnectionPoolerStatus = &enterprisev4.ConnectionPoolerStatus{Enabled: true} p.health.State = pgcConstants.Ready p.health.Reason = reasonAllInstancesReady p.health.Message = msgPoolersReady @@ -736,18 +1006,19 @@ func (p *poolerModel) Converge(ctx context.Context) (componentHealth, error) { } type configMapModel struct { - client client.Client - scheme *runtime.Scheme - events eventEmitter - runtime clusterRuntimeView - cluster *enterprisev4.PostgresCluster - secret string + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + runtime clusterRuntimeView + cluster *enterprisev4.PostgresCluster + secret string health componentHealth } -func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *configMapModel { - return &configMapModel{client: c, scheme: scheme, events: events, runtime: runtime, cluster: cluster, secret: secret} +func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *configMapModel { + return &configMapModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, runtime: runtime, cluster: cluster, secret: secret} } func (c *configMapModel) Name() string { return pgcConstants.ComponentConfigMap } @@ -758,7 +1029,7 @@ func (c *configMapModel) EvaluatePrerequisites(_ context.Context) (prerequisiteD Allowed: false, Health: componentHealth{ State: pgcConstants.Provisioning, - Condition: clusterReady, + Condition: configMapsReady, Reason: reasonCNPGProvisioning, Message: msgCNPGPendingCreation, Phase: provisioningClusterPhase, @@ -806,8 +1077,28 @@ func (c *configMapModel) Actuate(ctx context.Context) error { return nil } -func (c *configMapModel) Converge(ctx context.Context) (componentHealth, error) { - c.health = componentHealth{Condition: clusterReady} +func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, err error) { + c.health = componentHealth{Condition: configMapsReady} + defer func() { + statusErr := writeComponentStatus(c.updateStatus, c.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = c.health + }() + + if c.runtime == nil || !c.runtime.IsHealthy() { + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonCNPGProvisioning + c.health.Message = msgCNPGPendingCreation + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil + } if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { c.health.State = pgcConstants.Provisioning @@ -863,17 +1154,18 @@ func (c *configMapModel) Converge(ctx context.Context) (componentHealth, error) } type secretModel struct { - client client.Client - scheme *runtime.Scheme - events eventEmitter - cluster *enterprisev4.PostgresCluster - name string + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + cluster *enterprisev4.PostgresCluster + name string health componentHealth } -func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, cluster *enterprisev4.PostgresCluster, name string) *secretModel { - return &secretModel{client: c, scheme: scheme, events: events, cluster: cluster, name: name} +func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, name string) *secretModel { + return &secretModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, cluster: cluster, name: name} } func (s *secretModel) Name() string { return pgcConstants.ComponentSecret } @@ -920,12 +1212,23 @@ func (s *secretModel) Actuate(ctx context.Context) error { return nil } -func (s *secretModel) Converge(ctx context.Context) (componentHealth, error) { - s.health = componentHealth{Condition: clusterReady} +func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err error) { + s.health = componentHealth{Condition: secretsReady} + defer func() { + statusErr := writeComponentStatus(s.updateStatus, s.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = s.health + }() if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { s.health.State = pgcConstants.Provisioning - s.health.Reason = reasonUserSecretFailed + s.health.Reason = reasonUserSecretPending s.health.Message = msgSecretRefNotPublished s.health.Phase = provisioningClusterPhase s.health.Result = ctrl.Result{RequeueAfter: retryDelay} @@ -937,7 +1240,7 @@ func (s *secretModel) Converge(ctx context.Context) (componentHealth, error) { if err := s.client.Get(ctx, key, secret); err != nil { if apierrors.IsNotFound(err) { s.health.State = pgcConstants.Provisioning - s.health.Reason = reasonUserSecretFailed + s.health.Reason = reasonUserSecretPending s.health.Message = msgSecretNotFoundYet s.health.Phase = provisioningClusterPhase s.health.Result = ctrl.Result{RequeueAfter: retryDelay} @@ -972,7 +1275,7 @@ func (s *secretModel) Converge(ctx context.Context) (componentHealth, error) { return s.health, nil } -func isPendingState(state pgcConstants.State) bool { +func isIntermediateState(state pgcConstants.State) bool { switch state { case pgcConstants.Pending, pgcConstants.Provisioning, @@ -1271,7 +1574,9 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } - metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + if metrics != nil { + metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + } if err := c.Status().Update(ctx, cluster); err != nil { return fmt.Errorf("failed to update PostgresCluster status: %w", err) @@ -1279,6 +1584,27 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } +func setStatusFromHealth(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster, health componentHealth) error { + conditionStatus := metav1.ConditionFalse + if health.State == pgcConstants.Ready { + conditionStatus = metav1.ConditionTrue + } + return setStatus(ctx, c, metrics, cluster, health.Condition, conditionStatus, health.Reason, health.Message, health.Phase) +} + +func setPhaseStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, phase reconcileClusterPhases) error { + before := cluster.Status.DeepCopy() + p := string(phase) + cluster.Status.Phase = &p + if equality.Semantic.DeepEqual(*before, cluster.Status) { + return nil + } + if err := c.Status().Update(ctx, cluster); err != nil { + return fmt.Errorf("failed to update PostgresCluster status phase: %w", err) + } + return nil +} + // generateConfigMap builds a ConfigMap with connection details for the PostgresCluster. func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster, secretName string) (*corev1.ConfigMap, error) { cmName := fmt.Sprintf("%s%s", cluster.Name, defaultConfigMapSuffix) diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 3b97de409..c9d255fe8 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -1206,8 +1206,8 @@ func TestComponentStateTriggerConditions(t *testing.T) { storageSize := resource.MustParse("10Gi") mergedConfig := &MergedConfig{ Spec: &enterprisev4.PostgresClusterSpec{ - Instances: &instances, - PostgresVersion: &version, + Instances: &instances, + PostgresVersion: &version, Storage: &storageSize, Resources: &corev1.ResourceRequirements{}, PostgreSQLConfig: map[string]string{}, @@ -1227,7 +1227,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { }, } c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cnpg).Build() - return newProvisionerModel(c, scheme, noopEventEmitter{}, cluster, mergedConfig, "pg1-secret") + return newProvisionerModel(c, scheme, noopEventEmitter{}, nil, cluster, mergedConfig, "pg1-secret") } makeRuntimeView := func(healthy bool) clusterRuntimeView { @@ -1246,6 +1246,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { combinations := []struct { name string components []componentModel + conditions []conditionTypes requeue []bool expectAll bool message string @@ -1259,6 +1260,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { fake.NewClientBuilder().WithScheme(scheme).Build(), scheme, noopEventEmitter{}, + nil, cluster, mergedConfig, nil, @@ -1267,9 +1269,10 @@ func TestComponentStateTriggerConditions(t *testing.T) { ) return []componentModel{provisioner, pooler} }(), - requeue: []bool{false, true}, - expectAll: false, - message: "Provisioner ready but pooler gate is blocked until CNPG is healthy", + conditions: []conditionTypes{clusterReady, poolerReady}, + requeue: []bool{false, true}, + expectAll: false, + message: "Provisioner ready but pooler gate is blocked until CNPG is healthy", }, { name: "Provisioner ready, pooler ready, configMap pending from NotFound", @@ -1280,6 +1283,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { fake.NewClientBuilder().WithScheme(scheme).Build(), scheme, noopEventEmitter{}, + nil, cluster, mergedConfig, nil, @@ -1294,15 +1298,17 @@ func TestComponentStateTriggerConditions(t *testing.T) { }, scheme, noopEventEmitter{}, + nil, makeRuntimeView(true), cluster, "pg1-secret", ) return []componentModel{provisioner, pooler, configMap} }(), - requeue: []bool{false, false, true}, - expectAll: false, - message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", + conditions: []conditionTypes{clusterReady, poolerReady, configMapsReady}, + requeue: []bool{false, false, true}, + expectAll: false, + message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", }, { name: "Flow successful, all components ready", @@ -1313,6 +1319,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { fake.NewClientBuilder().WithScheme(scheme).Build(), scheme, noopEventEmitter{}, + nil, cluster, mergedConfig, nil, @@ -1326,6 +1333,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { Build(), scheme, noopEventEmitter{}, + nil, makeRuntimeView(true), cluster, "pg1-secret", @@ -1337,14 +1345,16 @@ func TestComponentStateTriggerConditions(t *testing.T) { Build(), scheme, noopEventEmitter{}, + nil, cluster, "pg1-secret", ) return []componentModel{provisioner, pooler, configMap, secret} }(), - requeue: []bool{false, false, false, false}, - expectAll: true, - message: "", + conditions: []conditionTypes{clusterReady, poolerReady, configMapsReady, secretsReady}, + requeue: []bool{false, false, false, false}, + expectAll: true, + message: "", }, } @@ -1359,6 +1369,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { if !gate.Allowed { info := gate.Health state = info.State + assert.Equal(t, tt.conditions[i], info.Condition) assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) continue } @@ -1367,6 +1378,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { info, err := check.Converge(ctx) require.NoError(t, err) state = info.State + assert.Equal(t, tt.conditions[i], info.Condition) assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) } assert.Equal(t, tt.expectAll, state&pgcConstants.Ready == pgcConstants.Ready, @@ -1374,3 +1386,312 @@ func TestComponentStateTriggerConditions(t *testing.T) { }) } } + +func TestSyncManagedRolesStatusFromCNPG(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + specRoles []enterprisev4.ManagedRole + cnpgStatus cnpgv1.ManagedRoles + reconciled []string + pending []string + failed map[string]string + }{ + { + name: "marks unreconciled desired role as pending", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{}, + reconciled: nil, + pending: []string{"app_user"}, + failed: nil, + }, + { + name: "maps reconciled and pending roles from CNPG status", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_rw", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user"}, + cnpgv1.RoleStatusPendingReconciliation: {"app_rw"}, + }, + }, + reconciled: []string{"app_user"}, + pending: []string{"app_rw"}, + failed: nil, + }, + { + name: "maps cannot reconcile errors as failed", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{ + CannotReconcile: map[string][]string{ + "app_user": {"reserved role"}, + }, + }, + reconciled: nil, + pending: nil, + failed: map[string]string{ + "app_user": "reserved role", + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: tt.specRoles, + }, + } + cnpgCluster := &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + ManagedRolesStatus: tt.cnpgStatus, + }, + } + + syncManagedRolesStatusFromCNPG(cluster, cnpgCluster) + + require.NotNil(t, cluster.Status.ManagedRolesStatus) + assert.Equal(t, tt.reconciled, cluster.Status.ManagedRolesStatus.Reconciled) + assert.Equal(t, tt.pending, cluster.Status.ManagedRolesStatus.Pending) + assert.Equal(t, tt.failed, cluster.Status.ManagedRolesStatus.Failed) + }) + } +} + +func TestManagedRolesModelConverge(t *testing.T) { + t.Parallel() + + makeRuntimeView := func(phase string, managedRoles cnpgv1.ManagedRoles) clusterRuntimeView { + return provisionerRuntimeView{model: &provisionerModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: phase, + ManagedRolesStatus: managedRoles, + }, + }, + }} + } + + tests := []struct { + name string + runtimeView clusterRuntimeView + specRoles []enterprisev4.ManagedRole + expectedState pgcConstants.State + expectedReason conditionReasons + expectErr bool + expectStatusPublished bool + expectPending []string + expectFailed map[string]string + }{ + { + name: "returns failed when runtime is not healthy", + runtimeView: makeRuntimeView(cnpgv1.PhaseFirstPrimary, cnpgv1.ManagedRoles{}), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Failed, + expectedReason: reasonManagedRolesFailed, + expectErr: true, + expectStatusPublished: false, + }, + { + name: "returns failed when role is still pending reconciliation", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusPendingReconciliation: {"app_user"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Failed, + expectedReason: reasonManagedRolesFailed, + expectErr: true, + expectStatusPublished: true, + expectPending: []string{"app_user"}, + }, + { + name: "returns failed when role cannot reconcile", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + CannotReconcile: map[string][]string{ + "app_user": {"reserved role"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Failed, + expectedReason: reasonManagedRolesFailed, + expectErr: true, + expectStatusPublished: true, + expectFailed: map[string]string{ + "app_user": "reserved role", + }, + }, + { + name: "returns ready when all desired roles are reconciled", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_user_rw", Exists: true}, + }, + expectedState: pgcConstants.Ready, + expectedReason: reasonManagedRolesReady, + expectErr: false, + expectStatusPublished: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: tt.specRoles, + }, + } + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + noopEventEmitter{}, + nil, + tt.runtimeView, + cluster, + "pg1-secret", + ) + + health, err := model.Converge(context.Background()) + if tt.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + + assert.Equal(t, managedRolesReady, health.Condition) + assert.Equal(t, tt.expectedState, health.State) + assert.Equal(t, tt.expectedReason, health.Reason) + if tt.expectStatusPublished { + require.NotNil(t, cluster.Status.ManagedRolesStatus) + assert.Equal(t, tt.expectPending, cluster.Status.ManagedRolesStatus.Pending) + assert.Equal(t, tt.expectFailed, cluster.Status.ManagedRolesStatus.Failed) + } else { + assert.Nil(t, cluster.Status.ManagedRolesStatus) + } + }) + } +} + +func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + t.Run("does not set enabled true while pooler is pending", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + &MergedConfig{}, + nil, + true, + true, + ) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Nil(t, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Pending, health.State) + }) + + t.Run("sets enabled true when pooler converges ready", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + rwPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readWriteEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + roPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readOnlyEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).WithObjects(rwPooler, roPooler).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + &MergedConfig{}, + &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}, + true, + true, + ) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Equal(t, &enterprisev4.ConnectionPoolerStatus{Enabled: true}, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Ready, health.State) + }) + + t.Run("sets status nil when pooler disabled", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{ + ConnectionPoolerStatus: &enterprisev4.ConnectionPoolerStatus{Enabled: true}, + }, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + &MergedConfig{}, + nil, + false, + false, + ) + + require.NoError(t, model.Actuate(context.Background())) + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Nil(t, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Ready, health.State) + }) +} diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 00209d2df..85afc2f93 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -85,19 +85,26 @@ const ( failedClusterPhase reconcileClusterPhases = "Failed" // condition types - clusterReady conditionTypes = "ClusterReady" - poolerReady conditionTypes = "PoolerReady" + clusterReady conditionTypes = "ClusterReady" + poolerReady conditionTypes = "PoolerReady" + managedRolesReady conditionTypes = "ManagedRolesReady" + secretsReady conditionTypes = "SecretsReady" + configMapsReady conditionTypes = "ConfigMapsReady" // condition reasons — clusterReady reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" + reasonManagedRolesReady conditionReasons = "ManagedRolesReady" reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" reasonClusterGetFailed conditionReasons = "ClusterGetFailed" reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" + reasonConfigMapReady conditionReasons = "ConfigMapReady" reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" + reasonUserSecretPending conditionReasons = "UserSecretPending" reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed" + reasonSuperUserSecretReady conditionReasons = "SuperUserSecretReady" reasonSuperUserSecretFailed conditionReasons = "SuperUserSecretFailed" reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" diff --git a/pkg/postgresql/cluster/core/types/constants/components.go b/pkg/postgresql/cluster/core/types/constants/components.go index a77b88d74..f6dcdfb7b 100644 --- a/pkg/postgresql/cluster/core/types/constants/components.go +++ b/pkg/postgresql/cluster/core/types/constants/components.go @@ -1,8 +1,9 @@ package pgcConstants const ( - ComponentProvisioner = "provisioner" - ComponentPooler = "pooler" - ComponentConfigMap = "configmap" - ComponentSecret = "secret" + ComponentManagedRoles = "managedRoles" + ComponentProvisioner = "provisioner" + ComponentPooler = "pooler" + ComponentConfigMap = "configMap" + ComponentSecret = "secret" ) diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index cbf7c15a9..6511b502f 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -34,7 +34,7 @@ const ( deletionPolicyRetain string = "Retain" deletionPolicyDelete string = "Delete" - + postgresDatabaseFinalizerName string = "postgresdatabases.enterprise.splunk.com/finalizer" annotationRetainedFrom string = "enterprise.splunk.com/retained-from" @@ -79,10 +79,10 @@ const ( reasonUsersAvailable conditionReasons = "UsersAvailable" reasonRoleConflict conditionReasons = "RoleConflict" reasonConfigMapsCreationFailed conditionReasons = "ConfigMapsCreationFailed" - reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" - reasonDatabaseReconcileFailed conditionReasons = "DatabaseReconcileFailed" - reasonPrivilegesGranted conditionReasons = "PrivilegesGranted" - reasonPrivilegesGrantFailed conditionReasons = "PrivilegesGrantFailed" + reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" + reasonDatabaseReconcileFailed conditionReasons = "DatabaseReconcileFailed" + reasonPrivilegesGranted conditionReasons = "PrivilegesGranted" + reasonPrivilegesGrantFailed conditionReasons = "PrivilegesGrantFailed" // ClusterReady sentinel values returned by ensureClusterReady. // Exported so the controller adapter can switch on them if needed. From 161c57906b812cf561d25a0467d1a3ffeede91d4 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Wed, 15 Apr 2026 17:26:37 +0200 Subject: [PATCH 10/10] review and rebase changes --- .../postgrescluster_controller_test.go | 32 +- pkg/postgresql/cluster/core/cluster.go | 141 ++++---- .../cluster/core/cluster_unit_test.go | 301 ++++++++++++++---- pkg/postgresql/cluster/core/events.go | 1 + pkg/postgresql/cluster/core/types.go | 5 +- 5 files changed, 326 insertions(+), 154 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 0ca89a82d..96ac67987 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -54,12 +54,14 @@ import ( * PC-09 ignores no-op updates */ -func containsEvent(events []string, recorder *record.FakeRecorder, eventType string, event string) bool { +func containsEvents(events *[]string, recorder *record.FakeRecorder, eventType string, event string) bool { for { select { case e := <-recorder.Events: - events = append(events, e) - return strings.Contains(e, eventType) && strings.Contains(e, event) + *events = append(*events, e) + if strings.Contains(e, eventType) && strings.Contains(e, event) { + return true + } default: return false } @@ -248,17 +250,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { secretCond := meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") Expect(secretCond).NotTo(BeNil()) Expect(secretCond.Status).To(Equal(metav1.ConditionTrue)) - Expect(secretCond.Reason).To(Equal("ClusterBuildSucceeded")) + Expect(secretCond.Reason).To(Equal("SuperUserSecretReady")) configMapCond := meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") Expect(configMapCond).NotTo(BeNil()) Expect(configMapCond.Status).To(Equal(metav1.ConditionTrue)) - Expect(configMapCond.Reason).To(Equal("ClusterBuildSucceeded")) + Expect(configMapCond.Reason).To(Equal("ConfigMapReconciled")) managedRolesCond := meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") Expect(managedRolesCond).NotTo(BeNil()) Expect(managedRolesCond.Status).To(Equal(metav1.ConditionTrue)) - Expect(managedRolesCond.Reason).To(Equal("ManagedRolesReady")) + Expect(managedRolesCond.Reason).To(Equal("ManagedRolesReconciled")) // Pooler is disabled in this suite fixture, but converge publishes PoolerReady=True with disabled message. poolerCond := meta.FindStatusCondition(pc.Status.Conditions, "PoolerReady") @@ -281,11 +283,10 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) received := make([]string, 0, 8) - Eventually(func() bool { - return containsEvent( - received, fakeRecorder, - v1.EventTypeNormal, core.EventClusterReady) - }, "5s", "100ms").Should(BeTrue(), "events seen: %v", received) + Expect(containsEvents( + &received, fakeRecorder, + v1.EventTypeNormal, core.EventClusterReady, + )).To(BeTrue(), "events seen: %v", received) }) // PC-07 @@ -381,11 +382,10 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { }, "20s", "250ms").Should(BeTrue()) received := make([]string, 0, 8) - Eventually(func() bool { - return containsEvent( - received, fakeRecorder, - v1.EventTypeWarning, core.EventClusterClassNotFound) - }, "5s", "100ms").Should(BeTrue(), "events seen: %v", received) + Expect(containsEvents( + &received, fakeRecorder, + v1.EventTypeWarning, core.EventClusterClassNotFound, + )).To(BeTrue(), "events seen: %v", received) }) }) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index ffc77cab4..eb146ba77 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -163,14 +163,14 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil secretComponent := newSecretModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, postgresSecretName) - provisionerComponent := newProvisionerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, mergedConfig, postgresSecretName) + clusterComponent := newProvisionerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, mergedConfig, postgresSecretName) - components := []componentModel{ + bootstrapComponents := []component{ secretComponent, - provisionerComponent, + clusterComponent, } - phase := func(component componentModel) (ctrl.Result, error) { + phase := func(component component) (ctrl.Result, error) { componentLogger := logger.WithValues("component", component.Name()) gate, gateErr := component.EvaluatePrerequisites(ctx) if gateErr != nil { @@ -246,7 +246,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, nil } - for _, component := range components { + for _, component := range bootstrapComponents { result, err := phase(component) if err != nil { return result, err @@ -256,10 +256,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } } - cnpgCluster = provisionerComponent.cnpgCluster - runtimeView := provisionerRuntimeView{model: provisionerComponent} - oldConditions := append([]metav1.Condition(nil), postgresCluster.Status.Conditions...) - runtimeComponents := []componentModel{ + cnpgCluster = clusterComponent.cnpgCluster + runtimeView := clusterRuntimeViewAdapter{model: clusterComponent} + runtimeComponents := []component{ newManagedRolesModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), newPoolerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), newConfigMapModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), @@ -274,8 +273,6 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return result, nil } } - rc.emitPoolerReadyTransition(postgresCluster, oldConditions) - logger.Info("Reconciliation complete") if err := updatePhaseStatus(readyClusterPhase); err != nil { if apierrors.IsConflict(err) { @@ -317,30 +314,10 @@ type componentHealth struct { Result ctrl.Result } -type componentModel interface { - phase -} - -type actuator interface { +type component interface { Actuate(ctx context.Context) error -} - -type converger interface { Converge(ctx context.Context) (componentHealth, error) -} - -type dependencyGated interface { EvaluatePrerequisites(ctx context.Context) (prerequisiteDecision, error) -} - -type phase interface { - actuator - converger - dependencyGated - namer -} - -type namer interface { Name() string } @@ -356,20 +333,25 @@ type eventEmitter interface { emitWarning(obj client.Object, reason, message string) } +type poolerEmitter interface { + eventEmitter + emitPoolerReadyTransition(obj client.Object, conditions []metav1.Condition) +} + type clusterRuntimeView interface { Cluster() *cnpgv1.Cluster IsHealthy() bool } -type provisionerRuntimeView struct { +type clusterRuntimeViewAdapter struct { model *provisionerModel } -func (v provisionerRuntimeView) Cluster() *cnpgv1.Cluster { +func (v clusterRuntimeViewAdapter) Cluster() *cnpgv1.Cluster { return v.model.cnpgCluster } -func (v provisionerRuntimeView) IsHealthy() bool { +func (v clusterRuntimeViewAdapter) IsHealthy() bool { return v.model.cnpgCluster != nil && v.model.cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy } @@ -641,18 +623,25 @@ func newManagedRolesModel(c client.Client, scheme *runtime.Scheme, events eventE func (m *managedRolesModel) Name() string { return pgcConstants.ComponentManagedRoles } -func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { +func (m *managedRolesModel) runtimeGateHealth() (componentHealth, bool) { if m.runtime == nil || !m.runtime.IsHealthy() { + return componentHealth{ + State: pgcConstants.Pending, + Condition: managedRolesReady, + Reason: reasonManagedRolesPending, + Message: "Managed roles blocked until CNPG cluster is healthy", + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, true + } + return componentHealth{}, false +} + +func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if gateHealth, blocked := m.runtimeGateHealth(); blocked { return prerequisiteDecision{ Allowed: false, - Health: componentHealth{ - State: pgcConstants.Failed, - Condition: managedRolesReady, - Reason: reasonManagedRolesFailed, - Message: "Managed roles blocked until CNPG cluster is healthy", - Phase: failedClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, + Health: gateHealth, }, nil } return prerequisiteDecision{Allowed: true}, nil @@ -686,13 +675,9 @@ func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealt health = m.health }() - if m.runtime == nil || !m.runtime.IsHealthy() { - m.health.State = pgcConstants.Failed - m.health.Reason = reasonManagedRolesFailed - m.health.Message = "Managed roles blocked until CNPG cluster is healthy" - m.health.Phase = failedClusterPhase - m.health.Result = ctrl.Result{RequeueAfter: retryDelay} - return m.health, fmt.Errorf("managed roles blocked until CNPG cluster is healthy") + if gateHealth, blocked := m.runtimeGateHealth(); blocked { + m.health = gateHealth + return m.health, nil } syncManagedRolesStatusFromCNPG(m.cluster, m.runtime.Cluster()) @@ -703,6 +688,7 @@ func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealt m.health.Message = "Managed roles status not published yet" m.health.Phase = failedClusterPhase m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + m.emitManagedRolesConvergeFailure(m.health.Message) return m.health, fmt.Errorf("managed roles status not published") } @@ -712,16 +698,17 @@ func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealt m.health.Message = fmt.Sprintf("Managed roles reconciliation failed for %d role(s)", len(status.Failed)) m.health.Phase = failedClusterPhase m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + m.emitManagedRolesConvergeFailure(m.health.Message) return m.health, fmt.Errorf("managed roles have failed entries") } if len(status.Pending) > 0 { - m.health.State = pgcConstants.Failed - m.health.Reason = reasonManagedRolesFailed + m.health.State = pgcConstants.Pending + m.health.Reason = reasonManagedRolesPending m.health.Message = fmt.Sprintf("Managed roles pending for %d role(s)", len(status.Pending)) - m.health.Phase = failedClusterPhase + m.health.Phase = pendingClusterPhase m.health.Result = ctrl.Result{RequeueAfter: retryDelay} - return m.health, fmt.Errorf("managed roles have pending entries") + return m.health, nil } m.health.State = pgcConstants.Ready @@ -729,9 +716,23 @@ func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealt m.health.Message = "Managed roles are reconciled" m.health.Phase = readyClusterPhase m.health.Result = ctrl.Result{} + if !meta.IsStatusConditionTrue(m.cluster.Status.Conditions, string(managedRolesReady)) { + m.events.emitNormal(m.cluster, EventManagedRolesReady, m.health.Message) + } return m.health, nil } +func (m *managedRolesModel) emitManagedRolesConvergeFailure(message string) { + cond := meta.FindStatusCondition(m.cluster.Status.Conditions, string(managedRolesReady)) + if cond != nil && + cond.Status == metav1.ConditionFalse && + cond.Reason == string(reasonManagedRolesFailed) && + cond.Message == message { + return + } + m.events.emitWarning(m.cluster, EventManagedRolesFailed, message) +} + // TODO: Ports as access to cnpg originated info to decouple. func syncManagedRolesStatusFromCNPG(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) { if cluster == nil || cnpgCluster == nil { @@ -794,7 +795,7 @@ func syncManagedRolesStatusFromCNPG(cluster *enterprisev4.PostgresCluster, cnpgC type poolerModel struct { client client.Client scheme *runtime.Scheme - events eventEmitter + events poolerEmitter updateStatus healthStatusUpdater cluster *enterprisev4.PostgresCluster mergedConfig *MergedConfig @@ -805,7 +806,7 @@ type poolerModel struct { health componentHealth } -func newPoolerModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { +func newPoolerModel(c client.Client, scheme *runtime.Scheme, events poolerEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { return &poolerModel{ client: c, scheme: scheme, @@ -993,6 +994,7 @@ func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err p.health.Message = msgPoolersReady p.health.Phase = readyClusterPhase p.health.Result = ctrl.Result{} + p.events.emitPoolerReadyTransition(p.cluster, p.cluster.Status.Conditions) return p.health, nil } @@ -1015,19 +1017,6 @@ func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmit func (c *configMapModel) Name() string { return pgcConstants.ComponentConfigMap } func (c *configMapModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { - if c.runtime == nil || !c.runtime.IsHealthy() { - return prerequisiteDecision{ - Allowed: false, - Health: componentHealth{ - State: pgcConstants.Provisioning, - Condition: configMapsReady, - Reason: reasonCNPGProvisioning, - Message: msgCNPGPendingCreation, - Phase: provisioningClusterPhase, - Result: ctrl.Result{RequeueAfter: retryDelay}, - }, - }, nil - } return prerequisiteDecision{Allowed: true}, nil } @@ -1137,7 +1126,7 @@ func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, } c.health.State = pgcConstants.Ready - c.health.Reason = reasonClusterBuildSucceeded + c.health.Reason = reasonConfigMapReady c.health.Message = msgAccessConfigMapReady c.health.Phase = readyClusterPhase c.health.Result = ctrl.Result{} @@ -1259,7 +1248,7 @@ func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err } s.health.State = pgcConstants.Ready - s.health.Reason = reasonClusterBuildSucceeded + s.health.Reason = reasonSuperUserSecretReady s.health.Message = msgSuperuserSecretReady s.health.Phase = readyClusterPhase s.health.Result = ctrl.Result{} @@ -1466,14 +1455,6 @@ func isPoolerReady(pooler *cnpgv1.Pooler) bool { return pooler.Status.Instances >= desired } -func poolerInstanceCount(p *cnpgv1.Pooler) (desired, scheduled int32) { - desired = 1 - if p.Spec.Instances != nil { - desired = *p.Spec.Instances - } - return desired, p.Status.Instances -} - // createOrUpdateConnectionPoolers creates RW and RO poolers if they don't exist. func createOrUpdateConnectionPoolers(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster) error { if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readWriteEndpoint); err != nil { diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 0fe1724e1..de46e0831 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -26,8 +27,28 @@ type configMapNotFoundClient struct { type noopEventEmitter struct{} -func (noopEventEmitter) emitNormal(_ client.Object, _, _ string) {} -func (noopEventEmitter) emitWarning(_ client.Object, _, _ string) {} +func (noopEventEmitter) emitNormal(_ client.Object, _, _ string) {} +func (noopEventEmitter) emitWarning(_ client.Object, _, _ string) {} +func (noopEventEmitter) emitPoolerReadyTransition(_ client.Object, _ []metav1.Condition) {} + +type captureEventEmitter struct { + normals []string + warnings []string +} + +func (c *captureEventEmitter) emitNormal(_ client.Object, reason, message string) { + c.normals = append(c.normals, reason+":"+message) +} + +func (c *captureEventEmitter) emitWarning(_ client.Object, reason, message string) { + c.warnings = append(c.warnings, reason+":"+message) +} + +func (c *captureEventEmitter) emitPoolerReadyTransition(_ client.Object, conditions []metav1.Condition) { + if !meta.IsStatusConditionTrue(conditions, string(poolerReady)) { + c.normals = append(c.normals, EventPoolerReady+":Connection poolers are ready") + } +} func (c configMapNotFoundClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { if _, ok := obj.(*corev1.ConfigMap); ok { @@ -112,6 +133,46 @@ func TestIsPoolerReady(t *testing.T) { } } +func TestPoolerInstanceCountManual(t *testing.T) { + tests := []struct { + name string + pooler *cnpgv1.Pooler + expectedDesired int32 + expectedScheduled int32 + }{ + { + name: "nil instances defaults desired to 1", + pooler: &cnpgv1.Pooler{ + Status: cnpgv1.PoolerStatus{Instances: 3}, + }, + expectedDesired: 1, + expectedScheduled: 3, + }, + { + name: "explicit instances uses spec value", + pooler: &cnpgv1.Pooler{ + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(5))}, + Status: cnpgv1.PoolerStatus{Instances: 2}, + }, + expectedDesired: 5, + expectedScheduled: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + desired := int32(1) + if tt.pooler.Spec.Instances != nil { + desired = *tt.pooler.Spec.Instances + } + scheduled := tt.pooler.Status.Instances + + assert.Equal(t, tt.expectedDesired, desired) + assert.Equal(t, tt.expectedScheduled, scheduled) + }) + } +} + func TestNormalizeCNPGClusterSpec(t *testing.T) { tests := []struct { name string @@ -1020,42 +1081,6 @@ func TestGenerateConfigMap(t *testing.T) { }) } -func TestPoolerInstanceCount(t *testing.T) { - tests := []struct { - name string - pooler *cnpgv1.Pooler - expectedDesired int32 - expectedScheduled int32 - }{ - { - name: "nil instances defaults desired to 1", - pooler: &cnpgv1.Pooler{ - Status: cnpgv1.PoolerStatus{Instances: 3}, - }, - expectedDesired: 1, - expectedScheduled: 3, - }, - { - name: "explicit instances returns spec value", - pooler: &cnpgv1.Pooler{ - Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(5))}, - Status: cnpgv1.PoolerStatus{Instances: 2}, - }, - expectedDesired: 5, - expectedScheduled: 2, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - desired, scheduled := poolerInstanceCount(tt.pooler) - - assert.Equal(t, tt.expectedDesired, desired) - assert.Equal(t, tt.expectedScheduled, scheduled) - }) - } -} - func TestGeneratePassword(t *testing.T) { pw, err := generatePassword() @@ -1235,9 +1260,9 @@ func TestComponentStateTriggerConditions(t *testing.T) { makeRuntimeView := func(healthy bool) clusterRuntimeView { if !healthy { - return provisionerRuntimeView{model: &provisionerModel{}} + return clusterRuntimeViewAdapter{model: &provisionerModel{}} } - return provisionerRuntimeView{model: &provisionerModel{ + return clusterRuntimeViewAdapter{model: &provisionerModel{ cnpgCluster: &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, @@ -1248,7 +1273,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { // TODO: as soon as coupling is addressed, remove this monster of a test. combinations := []struct { name string - components []componentModel + components []component conditions []conditionTypes requeue []bool expectAll bool @@ -1256,7 +1281,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { }{ { name: "Provisioner ready, pooler blocked by prerequisites", - components: func() []componentModel { + components: func() []component { cluster := examplePgCluster.DeepCopy() provisioner := makeReadyProvisioner(cluster) pooler := newPoolerModel( @@ -1270,7 +1295,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { true, true, ) - return []componentModel{provisioner, pooler} + return []component{provisioner, pooler} }(), conditions: []conditionTypes{clusterReady, poolerReady}, requeue: []bool{false, true}, @@ -1279,7 +1304,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { }, { name: "Provisioner ready, pooler ready, configMap pending from NotFound", - components: func() []componentModel { + components: func() []component { cluster := examplePgCluster.DeepCopy() provisioner := makeReadyProvisioner(cluster) pooler := newPoolerModel( @@ -1306,7 +1331,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { cluster, "pg1-secret", ) - return []componentModel{provisioner, pooler, configMap} + return []component{provisioner, pooler, configMap} }(), conditions: []conditionTypes{clusterReady, poolerReady, configMapsReady}, requeue: []bool{false, false, true}, @@ -1315,7 +1340,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { }, { name: "Flow successful, all components ready", - components: func() []componentModel { + components: func() []component { cluster := examplePgCluster.DeepCopy() provisioner := makeReadyProvisioner(cluster) pooler := newPoolerModel( @@ -1352,7 +1377,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { cluster, "pg1-secret", ) - return []componentModel{provisioner, pooler, configMap, secret} + return []component{provisioner, pooler, configMap, secret} }(), conditions: []conditionTypes{clusterReady, poolerReady, configMapsReady, secretsReady}, requeue: []bool{false, false, false, false}, @@ -1475,7 +1500,7 @@ func TestManagedRolesModelConverge(t *testing.T) { t.Parallel() makeRuntimeView := func(phase string, managedRoles cnpgv1.ManagedRoles) clusterRuntimeView { - return provisionerRuntimeView{model: &provisionerModel{ + return clusterRuntimeViewAdapter{model: &provisionerModel{ cnpgCluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ Phase: phase, @@ -1497,18 +1522,18 @@ func TestManagedRolesModelConverge(t *testing.T) { expectFailed map[string]string }{ { - name: "returns failed when runtime is not healthy", + name: "returns pending when runtime is not healthy", runtimeView: makeRuntimeView(cnpgv1.PhaseFirstPrimary, cnpgv1.ManagedRoles{}), specRoles: []enterprisev4.ManagedRole{ {Name: "app_user", Exists: true}, }, - expectedState: pgcConstants.Failed, - expectedReason: reasonManagedRolesFailed, - expectErr: true, + expectedState: pgcConstants.Pending, + expectedReason: reasonManagedRolesPending, + expectErr: false, expectStatusPublished: false, }, { - name: "returns failed when role is still pending reconciliation", + name: "returns pending when role is still pending reconciliation", runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ ByStatus: map[cnpgv1.RoleStatus][]string{ cnpgv1.RoleStatusPendingReconciliation: {"app_user"}, @@ -1517,9 +1542,9 @@ func TestManagedRolesModelConverge(t *testing.T) { specRoles: []enterprisev4.ManagedRole{ {Name: "app_user", Exists: true}, }, - expectedState: pgcConstants.Failed, - expectedReason: reasonManagedRolesFailed, - expectErr: true, + expectedState: pgcConstants.Pending, + expectedReason: reasonManagedRolesPending, + expectErr: false, expectStatusPublished: true, expectPending: []string{"app_user"}, }, @@ -1600,6 +1625,37 @@ func TestManagedRolesModelConverge(t *testing.T) { } } +func TestManagedRolesRuntimeGateHealthMatchesConverge(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + }, + } + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + noopEventEmitter{}, + nil, + clusterRuntimeViewAdapter{model: &provisionerModel{ + cnpgCluster: &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseFirstPrimary}}, + }}, + cluster, + "pg1-secret", + ) + + gate, err := model.EvaluatePrerequisites(context.Background()) + require.NoError(t, err) + require.False(t, gate.Allowed) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Equal(t, gate.Health, health) +} + func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { t.Parallel() @@ -1698,3 +1754,136 @@ func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { assert.Equal(t, pgcConstants.Ready, health.State) }) } + +func TestPoolerConvergeEmitsReadyEventOnTransition(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + events := &captureEventEmitter{} + rwPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readWriteEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + roPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readOnlyEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).WithObjects(rwPooler, roPooler).Build(), + scheme, + events, + nil, + cluster, + &MergedConfig{}, + &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}, + true, + true, + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + require.NotEmpty(t, events.normals) + assert.Contains(t, events.normals[0], EventPoolerReady) + + // No re-emission when condition already True. + cluster.Status.Conditions = []metav1.Condition{{ + Type: string(poolerReady), + Status: metav1.ConditionTrue, + }} + events.normals = nil + _, err = model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.normals) +} + +func TestManagedRolesConvergeDoesNotEmitFailureForPending(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{{Name: "app_user", Exists: true}}, + }, + } + events := &captureEventEmitter{} + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + events, + nil, + clusterRuntimeViewAdapter{model: &provisionerModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + ManagedRolesStatus: cnpgv1.ManagedRoles{}, + }, + }, + }}, + cluster, + "pg1-secret", + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.warnings) +} + +func TestManagedRolesConvergeEmitsReadyEventOnTransition(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + }, + } + events := &captureEventEmitter{} + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + events, + nil, + clusterRuntimeViewAdapter{model: &provisionerModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + ManagedRolesStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user"}, + }, + }, + }, + }, + }}, + cluster, + "pg1-secret", + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + require.NotEmpty(t, events.normals) + assert.Contains(t, events.normals[0], EventManagedRolesReady) + + // No re-emission when condition already True. + cluster.Status.Conditions = []metav1.Condition{{ + Type: string(managedRolesReady), + Status: metav1.ConditionTrue, + }} + events.normals = nil + _, err = model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.normals) +} diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index afcfd768e..c6d7d58e0 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -25,6 +25,7 @@ const ( EventClusterCreateFailed = "ClusterCreateFailed" EventClusterUpdateFailed = "ClusterUpdateFailed" EventManagedRolesFailed = "ManagedRolesFailed" + EventManagedRolesReady = "ManagedRolesReady" EventPoolerReconcileFailed = "PoolerReconcileFailed" EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" EventClusterDegraded = "ClusterDegraded" diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 85afc2f93..0a18c9c86 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -93,14 +93,15 @@ const ( // condition reasons — clusterReady reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" - reasonManagedRolesReady conditionReasons = "ManagedRolesReady" + reasonManagedRolesReady conditionReasons = "ManagedRolesReconciled" + reasonManagedRolesPending conditionReasons = "ManagedRolesPending" reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" reasonClusterGetFailed conditionReasons = "ClusterGetFailed" reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" - reasonConfigMapReady conditionReasons = "ConfigMapReady" + reasonConfigMapReady conditionReasons = "ConfigMapReconciled" reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" reasonUserSecretPending conditionReasons = "UserSecretPending" reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed"