@@ -396,35 +396,54 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
396396 rc .emitPoolerReadyTransition (postgresCluster , oldConditions )
397397 }
398398
399- if err := reconcilePostgreSQLMetricsService (ctx , c , rc .Scheme , postgresCluster , isPostgreSQLMetricsEnabled (postgresCluster , clusterClass )); err != nil {
400- return ctrl.Result {}, err
399+ postgresMetricsEnabled := isPostgreSQLMetricsEnabled (postgresCluster , clusterClass )
400+ poolerMetricsEnabled := isConnectionPoolerMetricsEnabled (postgresCluster , clusterClass )
401+ rwPoolerMetricsEnabled := poolerMetricsEnabled && poolerEnabled && rwPoolerExists
402+ roPoolerMetricsEnabled := poolerMetricsEnabled && poolerEnabled && roPoolerExists
403+ monitoringEnabled := postgresMetricsEnabled || (poolerMetricsEnabled && poolerEnabled )
404+
405+ monitoringFailure := func (reason conditionReasons , eventReason , message string , err error ) (ctrl.Result , error ) {
406+ return ctrl.Result {}, handleMonitoringFailure (ctx , c , rc , postgresCluster , reason , eventReason , message , err )
401407 }
402408
403- poolerMetricsEnabled := isConnectionPoolerMetricsEnabled (postgresCluster , clusterClass )
404- rwPoolerMetricsEnabled := poolerMetricsEnabled && rwPoolerExists
405- roPoolerMetricsEnabled := poolerMetricsEnabled && roPoolerExists
409+ oldConditions := make ([]metav1.Condition , len (postgresCluster .Status .Conditions ))
410+ copy (oldConditions , postgresCluster .Status .Conditions )
411+
412+ if err := reconcilePostgreSQLMetricsService (ctx , c , rc .Scheme , postgresCluster , postgresMetricsEnabled ); err != nil {
413+ return monitoringFailure (reasonPostgresMetricsServiceFailed , EventMetricsServiceReconcileFailed , fmt .Sprintf ("Failed to reconcile PostgreSQL metrics Service: %v" , err ), err )
414+ }
406415 if err := reconcileConnectionPoolerMetricsService (ctx , c , rc .Scheme , postgresCluster , readWriteEndpoint , rwPoolerMetricsEnabled ); err != nil {
407- return ctrl. Result {} , err
416+ return monitoringFailure ( reasonPoolerMetricsServiceFailed , EventMetricsServiceReconcileFailed , fmt . Sprintf ( "Failed to reconcile RW pooler metrics Service: %v" , err ), err )
408417 }
409418 if err := reconcileConnectionPoolerMetricsService (ctx , c , rc .Scheme , postgresCluster , readOnlyEndpoint , roPoolerMetricsEnabled ); err != nil {
410- return ctrl. Result {} , err
419+ return monitoringFailure ( reasonPoolerMetricsServiceFailed , EventMetricsServiceReconcileFailed , fmt . Sprintf ( "Failed to reconcile RO pooler metrics Service: %v" , err ), err )
411420 }
412-
413- if err := reconcilePostgreSQLMetricsServiceMonitor (
414- ctx , c , rc .Scheme , postgresCluster , isPostgreSQLMetricsEnabled (postgresCluster , clusterClass ),
415- ); err != nil {
416- return ctrl.Result {}, err
421+ if err := reconcilePostgreSQLMetricsServiceMonitor (ctx , c , rc .Scheme , postgresCluster , postgresMetricsEnabled ); err != nil {
422+ return monitoringFailure (reasonPostgresMetricsMonitorFailed , EventServiceMonitorReconcileFailed , fmt .Sprintf ("Failed to reconcile PostgreSQL metrics ServiceMonitor: %v" , err ), err )
417423 }
418-
419- if err := reconcileConnectionPoolerMetricsServiceMonitor (
420- ctx , c , rc .Scheme , postgresCluster , readWriteEndpoint , rwPoolerMetricsEnabled ,
421- ); err != nil {
422- return ctrl.Result {}, err
424+ if err := reconcileConnectionPoolerMetricsServiceMonitor (ctx , c , rc .Scheme , postgresCluster , readWriteEndpoint , rwPoolerMetricsEnabled ); err != nil {
425+ return monitoringFailure (reasonPoolerMetricsMonitorFailed , EventServiceMonitorReconcileFailed , fmt .Sprintf ("Failed to reconcile RW pooler metrics ServiceMonitor: %v" , err ), err )
423426 }
424- if err := reconcileConnectionPoolerMetricsServiceMonitor (
425- ctx , c , rc .Scheme , postgresCluster , readOnlyEndpoint , roPoolerMetricsEnabled ,
426- ); err != nil {
427- return ctrl.Result {}, err
427+ if err := reconcileConnectionPoolerMetricsServiceMonitor (ctx , c , rc .Scheme , postgresCluster , readOnlyEndpoint , roPoolerMetricsEnabled ); err != nil {
428+ return monitoringFailure (reasonPoolerMetricsMonitorFailed , EventServiceMonitorReconcileFailed , fmt .Sprintf ("Failed to reconcile RO pooler metrics ServiceMonitor: %v" , err ), err )
429+ }
430+
431+ if ! monitoringEnabled {
432+ if err := removeCondition (ctx , c , postgresCluster , monitoringReady ); err != nil {
433+ if apierrors .IsConflict (err ) {
434+ return ctrl.Result {Requeue : true }, nil
435+ }
436+ return ctrl.Result {}, err
437+ }
438+ } else {
439+ if err := setCondition (ctx , c , postgresCluster , monitoringReady , metav1 .ConditionTrue , reasonObservabilityResourcesReady , "Monitoring resources are ready" ); err != nil {
440+ if apierrors .IsConflict (err ) {
441+ return ctrl.Result {Requeue : true }, nil
442+ }
443+ return ctrl.Result {}, err
444+ }
445+
446+ rc .emitMonitoringReadyTransition (postgresCluster , oldConditions )
428447 }
429448
430449 // Reconcile ConfigMap when CNPG cluster is healthy.
@@ -896,6 +915,50 @@ func setStatus(ctx context.Context, c client.Client, cluster *enterprisev4.Postg
896915 return nil
897916}
898917
918+ // setCondition updates a specific condition on the PostgresCluster status.
919+ func setCondition (ctx context.Context , c client.Client , cluster * enterprisev4.PostgresCluster , condType conditionTypes , status metav1.ConditionStatus , reason conditionReasons , message string ) error {
920+ base := cluster .Status .DeepCopy ()
921+
922+ meta .SetStatusCondition (& cluster .Status .Conditions , metav1.Condition {
923+ Type : string (condType ),
924+ Status : status ,
925+ Reason : string (reason ),
926+ Message : message ,
927+ ObservedGeneration : cluster .Generation ,
928+ })
929+
930+ if equality .Semantic .DeepEqual (* base , cluster .Status ) {
931+ return nil
932+ }
933+ if err := c .Status ().Update (ctx , cluster ); err != nil {
934+ return fmt .Errorf ("failed to update PostgresCluster condition: %w" , err )
935+ }
936+ return nil
937+ }
938+
939+ // removeCondition removes a specific condition from the PostgresCluster status.
940+ func removeCondition (ctx context.Context , c client.Client , cluster * enterprisev4.PostgresCluster , condType conditionTypes ) error {
941+ base := cluster .Status .DeepCopy ()
942+
943+ meta .RemoveStatusCondition (& cluster .Status .Conditions , string (condType ))
944+
945+ if equality .Semantic .DeepEqual (* base , cluster .Status ) {
946+ return nil
947+ }
948+ if err := c .Status ().Update (ctx , cluster ); err != nil {
949+ return fmt .Errorf ("failed to remove PostgresCluster condition: %w" , err )
950+ }
951+ return nil
952+ }
953+
954+ func handleMonitoringFailure (ctx context.Context , c client.Client , rc * ReconcileContext , cluster * enterprisev4.PostgresCluster , reason conditionReasons , eventReason string , message string , err error ) error {
955+ rc .emitWarning (cluster , eventReason , message )
956+ if statusErr := setCondition (ctx , c , cluster , monitoringReady , metav1 .ConditionFalse , reason , message ); statusErr != nil {
957+ return errors .Join (err , fmt .Errorf ("failed to update MonitoringReady condition: %w" , statusErr ))
958+ }
959+ return err
960+ }
961+
899962// generateConfigMap builds a ConfigMap with connection details for the PostgresCluster.
900963func generateConfigMap (ctx context.Context , c client.Client , scheme * runtime.Scheme , cluster * enterprisev4.PostgresCluster , cnpgCluster * cnpgv1.Cluster , secretName string ) (* corev1.ConfigMap , error ) {
901964 cmName := fmt .Sprintf ("%s%s" , cluster .Name , defaultConfigMapSuffix )
0 commit comments