Skip to content

Commit d710f58

Browse files
committed
Update monitoring reconciliation logic, tests
1 parent 59428ae commit d710f58

10 files changed

Lines changed: 302 additions & 172 deletions

File tree

api/v4/postgrescluster_types.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,23 +109,23 @@ type PostgresClusterSpec struct {
109109
// +optional
110110
ClusterDeletionPolicy *string `json:"clusterDeletionPolicy,omitempty"`
111111

112-
// Observability contains configuration for metrics exposure features.
112+
// Monitoring contains configuration for metrics exposure features.
113113
// +optional
114-
Observability *PostgresObservabilityOverride `json:"observability,omitempty"`
114+
Monitoring *PostgresClusterMonitoring `json:"monitoring,omitempty"`
115115
}
116116

117-
// PostgresObservabilityOverride overrides observability configuration options for PostgresClusterClass.
118-
type PostgresObservabilityOverride struct {
117+
// PostgresClusterMonitoring overrides monitoring configuration options for PostgresClusterClass.
118+
type PostgresClusterMonitoring struct {
119119

120120
// +optional
121-
PostgreSQL *FeatureDisableOverride `json:"postgresql,omitempty"`
121+
PostgreSQLMetrics *FeatureDisableOverride `json:"postgresqlMetrics,omitempty"`
122122

123123
// +optional
124-
PgBouncer *FeatureDisableOverride `json:"pgbouncer,omitempty"`
124+
ConnectionPoolerMetrics *FeatureDisableOverride `json:"connectionPoolerMetrics,omitempty"`
125125
}
126126

127127
type FeatureDisableOverride struct {
128-
// Disable set to true will disable the feature even if it's enabled in the class.
128+
// Disabled set to true will disable the feature even if it's enabled in the class.
129129
// +kubebuilder:default=false
130130
// +optional
131131
Disabled *bool `json:"disabled,omitempty"`

api/v4/postgresclusterclass_types.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ type PostgresClusterClassSpec struct {
4949
CNPG *CNPGConfig `json:"cnpg,omitempty"`
5050
}
5151

52+
// +kubebuilder:validation:XValidation:rule="!has(self.monitoring) || !has(self.monitoring.connectionPoolerMetrics) || !has(self.monitoring.connectionPoolerMetrics.enabled) || !self.monitoring.connectionPoolerMetrics.enabled || (has(self.connectionPoolerEnabled) && self.connectionPoolerEnabled)",message="connectionPoolerEnabled must be true when monitoring.connectionPoolerMetrics.enabled is true"
5253
// PostgresClusterClassConfig contains provider-agnostic cluster configuration.
5354
// These fields define PostgresCluster infrastructure and can be overridden in PostgresCluster CR.
5455
type PostgresClusterClassConfig struct {
@@ -100,12 +101,12 @@ type PostgresClusterClassConfig struct {
100101
// +optional
101102
ConnectionPoolerEnabled *bool `json:"connectionPoolerEnabled,omitempty"`
102103

103-
// Observability contains configuration for metrics exposure.
104+
// Monitoring contains configuration for metrics exposure.
104105
// When enabled, creates metrics resources for clusters using this class.
105106
// Can be overridden in PostgresCluster CR.
106107
// +kubebuilder:default={}
107108
// +optional
108-
Observability *PostgresObservabilityClassConfig `json:"observability,omitempty"`
109+
Monitoring *PostgresMonitoringClassConfig `json:"monitoring,omitempty"`
109110
}
110111

111112
// ConnectionPoolerMode defines the PgBouncer connection pooling strategy.
@@ -179,11 +180,11 @@ type PostgresClusterClassStatus struct {
179180
Phase *string `json:"phase,omitempty"`
180181
}
181182

182-
type PostgresObservabilityClassConfig struct {
183+
type PostgresMonitoringClassConfig struct {
183184
// +optional
184-
PostgreSQL *MetricsClassConfig `json:"postgresql,omitempty"`
185+
PostgreSQLMetrics *MetricsClassConfig `json:"postgresqlMetrics,omitempty"`
185186
// +optional
186-
PgBouncer *MetricsClassConfig `json:"pgbouncer,omitempty"`
187+
ConnectionPoolerMetrics *MetricsClassConfig `json:"connectionPoolerMetrics,omitempty"`
187188
}
188189

189190
type MetricsClassConfig struct {

internal/controller/postgrescluster_controller_test.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,17 @@ import (
2828
cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
2929
. "github.com/onsi/ginkgo/v2"
3030
. "github.com/onsi/gomega"
31+
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
3132
"k8s.io/apimachinery/pkg/types"
3233
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
3334
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3435

3536
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
37+
"k8s.io/utils/ptr"
3638

3739
enterprisev4 "github.com/splunk/splunk-operator/api/v4"
3840
"github.com/splunk/splunk-operator/pkg/postgresql/cluster/core"
41+
corev1 "k8s.io/api/core/v1"
3942
)
4043

4144
/*
@@ -85,6 +88,28 @@ var _ = Describe("PostgresCluster Controller", func() {
8588
}
8689
}
8790

91+
recreateClassWithMonitoring := func(postgresMetricsEnabled bool) {
92+
Expect(k8sClient.Delete(ctx, pgClusterClass)).To(Succeed())
93+
94+
pgClusterClass = &enterprisev4.PostgresClusterClass{
95+
ObjectMeta: metav1.ObjectMeta{Name: className},
96+
Spec: enterprisev4.PostgresClusterClassSpec{
97+
Provisioner: provisioner,
98+
Config: &enterprisev4.PostgresClusterClassConfig{
99+
Instances: &[]int32{clusterMemberCount}[0],
100+
Storage: &[]resource.Quantity{resource.MustParse(storageAmount)}[0],
101+
PostgresVersion: &[]string{postgresVersion}[0],
102+
ConnectionPoolerEnabled: &[]bool{poolerEnabled}[0],
103+
Monitoring: &enterprisev4.PostgresMonitoringClassConfig{
104+
PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: ptr.To(postgresMetricsEnabled)},
105+
},
106+
},
107+
},
108+
}
109+
110+
Expect(k8sClient.Create(ctx, pgClusterClass)).To(Succeed())
111+
}
112+
88113
BeforeEach(func() {
89114
nameSuffix := fmt.Sprintf("%d-%d-%d",
90115
GinkgoParallelProcess(),
@@ -238,6 +263,65 @@ var _ = Describe("PostgresCluster Controller", func() {
238263
Expect(cond).NotTo(BeNil())
239264
Expect(cond.ObservedGeneration).To(Equal(pc.Generation))
240265
})
266+
267+
It("creates monitoring resources and sets MonitoringReady when monitoring is enabled", func() {
268+
recreateClassWithMonitoring(true)
269+
270+
Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed())
271+
reconcileNTimes(3)
272+
273+
pc := &enterprisev4.PostgresCluster{}
274+
Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed())
275+
cond := meta.FindStatusCondition(pc.Status.Conditions, "MonitoringReady")
276+
Expect(cond).NotTo(BeNil())
277+
Expect(cond.Status).To(Equal(metav1.ConditionTrue))
278+
Expect(cond.Reason).To(Equal("ObservabilityResourcesReady"))
279+
280+
metricsService := &corev1.Service{}
281+
Expect(k8sClient.Get(ctx, types.NamespacedName{
282+
Name: clusterName + "-postgres-metrics",
283+
Namespace: namespace,
284+
}, metricsService)).To(Succeed())
285+
286+
serviceMonitor := &monitoringv1.ServiceMonitor{}
287+
Expect(k8sClient.Get(ctx, types.NamespacedName{
288+
Name: clusterName + "-postgres-metrics-monitor",
289+
Namespace: namespace,
290+
}, serviceMonitor)).To(Succeed())
291+
})
292+
293+
It("removes monitoring resources and MonitoringReady when monitoring is disabled by cluster override", func() {
294+
recreateClassWithMonitoring(true)
295+
296+
Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed())
297+
reconcileNTimes(3)
298+
299+
current := &enterprisev4.PostgresCluster{}
300+
Expect(k8sClient.Get(ctx, pgClusterKey, current)).To(Succeed())
301+
current.Spec.Monitoring = &enterprisev4.PostgresClusterMonitoring{
302+
PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)},
303+
}
304+
Expect(k8sClient.Update(ctx, current)).To(Succeed())
305+
306+
reconcileNTimes(1)
307+
308+
Expect(k8sClient.Get(ctx, pgClusterKey, current)).To(Succeed())
309+
Expect(meta.FindStatusCondition(current.Status.Conditions, "MonitoringReady")).To(BeNil())
310+
311+
metricsService := &corev1.Service{}
312+
err := k8sClient.Get(ctx, types.NamespacedName{
313+
Name: clusterName + "-postgres-metrics",
314+
Namespace: namespace,
315+
}, metricsService)
316+
Expect(apierrors.IsNotFound(err)).To(BeTrue())
317+
318+
serviceMonitor := &monitoringv1.ServiceMonitor{}
319+
err = k8sClient.Get(ctx, types.NamespacedName{
320+
Name: clusterName + "-postgres-metrics-monitor",
321+
Namespace: namespace,
322+
}, serviceMonitor)
323+
Expect(apierrors.IsNotFound(err)).To(BeTrue())
324+
})
241325
})
242326
})
243327

internal/controller/suite_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ var _ = BeforeSuite(func(ctx context.Context) {
7878
CRDDirectoryPaths: []string{
7979
filepath.Join("..", "..", "config", "crd", "bases"),
8080
filepath.Join(cnpgModuleDir, "config", "crd", "bases"),
81+
filepath.Join("testdata", "crds"),
8182
},
8283
ErrorIfCRDPathMissing: true,
8384
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
apiVersion: apiextensions.k8s.io/v1
2+
kind: CustomResourceDefinition
3+
metadata:
4+
name: servicemonitors.monitoring.coreos.com
5+
spec:
6+
group: monitoring.coreos.com
7+
scope: Namespaced
8+
names:
9+
plural: servicemonitors
10+
singular: servicemonitor
11+
kind: ServiceMonitor
12+
listKind: ServiceMonitorList
13+
versions:
14+
- name: v1
15+
served: true
16+
storage: true
17+
schema:
18+
openAPIV3Schema:
19+
type: object
20+
properties:
21+
apiVersion:
22+
type: string
23+
kind:
24+
type: string
25+
metadata:
26+
type: object
27+
spec:
28+
type: object
29+
x-kubernetes-preserve-unknown-fields: true
30+
status:
31+
type: object
32+
x-kubernetes-preserve-unknown-fields: true

pkg/postgresql/cluster/core/cluster.go

Lines changed: 84 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -396,35 +396,54 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
396396
rc.emitPoolerReadyTransition(postgresCluster, oldConditions)
397397
}
398398

399-
if err := reconcilePostgreSQLMetricsService(ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass)); err != nil {
400-
return ctrl.Result{}, err
399+
postgresMetricsEnabled := isPostgreSQLMetricsEnabled(postgresCluster, clusterClass)
400+
poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass)
401+
rwPoolerMetricsEnabled := poolerMetricsEnabled && poolerEnabled && rwPoolerExists
402+
roPoolerMetricsEnabled := poolerMetricsEnabled && poolerEnabled && roPoolerExists
403+
monitoringEnabled := postgresMetricsEnabled || (poolerMetricsEnabled && poolerEnabled)
404+
405+
monitoringFailure := func(reason conditionReasons, eventReason, message string, err error) (ctrl.Result, error) {
406+
return ctrl.Result{}, handleMonitoringFailure(ctx, c, rc, postgresCluster, reason, eventReason, message, err)
401407
}
402408

403-
poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass)
404-
rwPoolerMetricsEnabled := poolerMetricsEnabled && rwPoolerExists
405-
roPoolerMetricsEnabled := poolerMetricsEnabled && roPoolerExists
409+
oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions))
410+
copy(oldConditions, postgresCluster.Status.Conditions)
411+
412+
if err := reconcilePostgreSQLMetricsService(ctx, c, rc.Scheme, postgresCluster, postgresMetricsEnabled); err != nil {
413+
return monitoringFailure(reasonPostgresMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile PostgreSQL metrics Service: %v", err), err)
414+
}
406415
if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled); err != nil {
407-
return ctrl.Result{}, err
416+
return monitoringFailure(reasonPoolerMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile RW pooler metrics Service: %v", err), err)
408417
}
409418
if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled); err != nil {
410-
return ctrl.Result{}, err
419+
return monitoringFailure(reasonPoolerMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile RO pooler metrics Service: %v", err), err)
411420
}
412-
413-
if err := reconcilePostgreSQLMetricsServiceMonitor(
414-
ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass),
415-
); err != nil {
416-
return ctrl.Result{}, err
421+
if err := reconcilePostgreSQLMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, postgresMetricsEnabled); err != nil {
422+
return monitoringFailure(reasonPostgresMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile PostgreSQL metrics ServiceMonitor: %v", err), err)
417423
}
418-
419-
if err := reconcileConnectionPoolerMetricsServiceMonitor(
420-
ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled,
421-
); err != nil {
422-
return ctrl.Result{}, err
424+
if err := reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled); err != nil {
425+
return monitoringFailure(reasonPoolerMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile RW pooler metrics ServiceMonitor: %v", err), err)
423426
}
424-
if err := reconcileConnectionPoolerMetricsServiceMonitor(
425-
ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled,
426-
); err != nil {
427-
return ctrl.Result{}, err
427+
if err := reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled); err != nil {
428+
return monitoringFailure(reasonPoolerMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile RO pooler metrics ServiceMonitor: %v", err), err)
429+
}
430+
431+
if !monitoringEnabled {
432+
if err := removeCondition(ctx, c, postgresCluster, monitoringReady); err != nil {
433+
if apierrors.IsConflict(err) {
434+
return ctrl.Result{Requeue: true}, nil
435+
}
436+
return ctrl.Result{}, err
437+
}
438+
} else {
439+
if err := setCondition(ctx, c, postgresCluster, monitoringReady, metav1.ConditionTrue, reasonObservabilityResourcesReady, "Monitoring resources are ready"); err != nil {
440+
if apierrors.IsConflict(err) {
441+
return ctrl.Result{Requeue: true}, nil
442+
}
443+
return ctrl.Result{}, err
444+
}
445+
446+
rc.emitMonitoringReadyTransition(postgresCluster, oldConditions)
428447
}
429448

430449
// Reconcile ConfigMap when CNPG cluster is healthy.
@@ -896,6 +915,50 @@ func setStatus(ctx context.Context, c client.Client, cluster *enterprisev4.Postg
896915
return nil
897916
}
898917

918+
// setCondition updates a specific condition on the PostgresCluster status.
919+
func setCondition(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string) error {
920+
base := cluster.Status.DeepCopy()
921+
922+
meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{
923+
Type: string(condType),
924+
Status: status,
925+
Reason: string(reason),
926+
Message: message,
927+
ObservedGeneration: cluster.Generation,
928+
})
929+
930+
if equality.Semantic.DeepEqual(*base, cluster.Status) {
931+
return nil
932+
}
933+
if err := c.Status().Update(ctx, cluster); err != nil {
934+
return fmt.Errorf("failed to update PostgresCluster condition: %w", err)
935+
}
936+
return nil
937+
}
938+
939+
// removeCondition removes a specific condition from the PostgresCluster status.
940+
func removeCondition(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes) error {
941+
base := cluster.Status.DeepCopy()
942+
943+
meta.RemoveStatusCondition(&cluster.Status.Conditions, string(condType))
944+
945+
if equality.Semantic.DeepEqual(*base, cluster.Status) {
946+
return nil
947+
}
948+
if err := c.Status().Update(ctx, cluster); err != nil {
949+
return fmt.Errorf("failed to remove PostgresCluster condition: %w", err)
950+
}
951+
return nil
952+
}
953+
954+
func handleMonitoringFailure(ctx context.Context, c client.Client, rc *ReconcileContext, cluster *enterprisev4.PostgresCluster, reason conditionReasons, eventReason string, message string, err error) error {
955+
rc.emitWarning(cluster, eventReason, message)
956+
if statusErr := setCondition(ctx, c, cluster, monitoringReady, metav1.ConditionFalse, reason, message); statusErr != nil {
957+
return errors.Join(err, fmt.Errorf("failed to update MonitoringReady condition: %w", statusErr))
958+
}
959+
return err
960+
}
961+
899962
// generateConfigMap builds a ConfigMap with connection details for the PostgresCluster.
900963
func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster, secretName string) (*corev1.ConfigMap, error) {
901964
cmName := fmt.Sprintf("%s%s", cluster.Name, defaultConfigMapSuffix)

0 commit comments

Comments
 (0)