From 7a43649877f5be3acfc78d1174181b8a75bba073 Mon Sep 17 00:00:00 2001 From: Honi Sanders Date: Mon, 4 May 2026 20:43:12 -0400 Subject: [PATCH] fix: handle NULL pg_stat_wal.stats_reset The stats_reset column of pg_stat_wal can be NULL on instances that have never had WAL stats initialized -- most commonly replicas (which do not write WAL stats locally) and primaries promoted from a replica that was bootstrapped via pg_basebackup. The exporter scanned this column into a plain string, so on such instances every collection failed with "sql: Scan error on column index 4, name stats_reset: converting NULL to string is unsupported", dropping all pg_stat_wal metrics for the pod and logging an error at every scrape interval. Scan stats_reset into sql.NullString and use the empty string as the metric label when the column is NULL. The actual WAL counters are then emitted normally. The pg_stat_wal scan logic is extracted into a package-private getPgStatWAL helper so the NULL behaviour can be exercised directly with sqlmock. Closes #9106 Signed-off-by: Honi Sanders --- pkg/management/postgres/probes.go | 48 +++++++----- pkg/management/postgres/probes_test.go | 74 +++++++++++++++++++ .../postgres/webserver/metricserver/wal.go | 16 ++-- 3 files changed, 110 insertions(+), 28 deletions(-) diff --git a/pkg/management/postgres/probes.go b/pkg/management/postgres/probes.go index 6b346c6346..85fb03ecca 100644 --- a/pkg/management/postgres/probes.go +++ b/pkg/management/postgres/probes.go @@ -26,6 +26,7 @@ import ( "path/filepath" "strings" + "github.com/blang/semver" "github.com/cloudnative-pg/machinery/pkg/fileutils" "github.com/cloudnative-pg/machinery/pkg/log" corev1 "k8s.io/api/core/v1" @@ -594,7 +595,7 @@ type PgStatWal struct { WalSync int64 WalWriteTime float64 WalSyncTime float64 - StatsReset string + StatsReset sql.NullString } // TryGetPgStatWAL retrieves pg_stat_wal on pg version 14 and further @@ -609,12 +610,21 @@ func (instance *Instance) TryGetPgStatWAL() (*PgStatWal, error) { return nil, err } - // Since PostgreSQL 18, `wal_write`, `wal_sync`, `wal_write_time` and - // `wal_sync_time` have been removed. - // See https://github.com/postgres/postgres/commit/2421e9a51d20bb83154e54a16ce628f9249fa907 + return getPgStatWAL(superUserDB, version) +} + +// getPgStatWAL scans pg_stat_wal from the given DB. The query shape depends on +// the major version because PostgreSQL 18 removed `wal_write`, `wal_sync`, +// `wal_write_time` and `wal_sync_time` (see +// https://github.com/postgres/postgres/commit/2421e9a51d20bb83154e54a16ce628f9249fa907). +// `stats_reset` is scanned as `sql.NullString` because it is legitimately NULL +// on instances that have never had WAL stats initialized — in particular +// replicas, and primaries that were promoted from a replica bootstrapped via +// `pg_basebackup`. +func getPgStatWAL(db *sql.DB, version semver.Version) (*PgStatWal, error) { var pgWalStat PgStatWal if version.Major < 18 { - row := superUserDB.QueryRow( + row := db.QueryRow( `SELECT wal_records, wal_fpi, @@ -639,28 +649,26 @@ func (instance *Instance) TryGetPgStatWAL() (*PgStatWal, error) { ); err != nil { return nil, err } + return &pgWalStat, nil } - if version.Major >= 18 { - row := superUserDB.QueryRow( - `SELECT - wal_records, + row := db.QueryRow( + `SELECT + wal_records, wal_fpi, wal_bytes, wal_buffers_full, stats_reset - FROM pg_catalog.pg_stat_wal`) - if err := row.Scan( - &pgWalStat.WalRecords, - &pgWalStat.WalFpi, - &pgWalStat.WalBytes, - &pgWalStat.WALBuffersFull, - &pgWalStat.StatsReset, - ); err != nil { - return nil, err - } + FROM pg_catalog.pg_stat_wal`) + if err := row.Scan( + &pgWalStat.WalRecords, + &pgWalStat.WalFpi, + &pgWalStat.WalBytes, + &pgWalStat.WALBuffersFull, + &pgWalStat.StatsReset, + ); err != nil { + return nil, err } - return &pgWalStat, nil } diff --git a/pkg/management/postgres/probes_test.go b/pkg/management/postgres/probes_test.go index 153780ba85..9c573fa947 100644 --- a/pkg/management/postgres/probes_test.go +++ b/pkg/management/postgres/probes_test.go @@ -267,3 +267,77 @@ var _ = Describe("probes", func() { }) }) }) + +var _ = Describe("getPgStatWAL", func() { + pgStatWalColumns := []string{ + "wal_records", + "wal_fpi", + "wal_bytes", + "wal_buffers_full", + "wal_write", + "wal_sync", + "wal_write_time", + "wal_sync_time", + "stats_reset", + } + + pg18Columns := []string{ + "wal_records", + "wal_fpi", + "wal_bytes", + "wal_buffers_full", + "stats_reset", + } + + It("returns the parsed stats when stats_reset is populated (PG<18)", func() { + db, mock, err := sqlmock.New() + Expect(err).ToNot(HaveOccurred()) + defer func() { _ = db.Close() }() + + mock.ExpectQuery(`FROM pg_catalog.pg_stat_wal`). + WillReturnRows(sqlmock.NewRows(pgStatWalColumns). + AddRow(int64(10), int64(20), int64(30), int64(40), + int64(50), int64(60), float64(70), float64(80), + "2026-05-04 12:00:00+00")) + + stat, err := getPgStatWAL(db, semver.Version{Major: 17}) + Expect(err).ToNot(HaveOccurred()) + Expect(stat).NotTo(BeNil()) + Expect(stat.WalRecords).To(Equal(int64(10))) + Expect(stat.StatsReset.Valid).To(BeTrue()) + Expect(stat.StatsReset.String).To(Equal("2026-05-04 12:00:00+00")) + }) + + It("does not error when stats_reset is NULL (PG<18, replica or post-promotion primary)", func() { + db, mock, err := sqlmock.New() + Expect(err).ToNot(HaveOccurred()) + defer func() { _ = db.Close() }() + + mock.ExpectQuery(`FROM pg_catalog.pg_stat_wal`). + WillReturnRows(sqlmock.NewRows(pgStatWalColumns). + AddRow(int64(0), int64(0), int64(0), int64(0), + int64(0), int64(0), float64(0), float64(0), + nil)) + + stat, err := getPgStatWAL(db, semver.Version{Major: 17}) + Expect(err).ToNot(HaveOccurred()) + Expect(stat).NotTo(BeNil()) + Expect(stat.StatsReset.Valid).To(BeFalse()) + Expect(stat.StatsReset.String).To(Equal("")) + }) + + It("does not error when stats_reset is NULL (PG>=18)", func() { + db, mock, err := sqlmock.New() + Expect(err).ToNot(HaveOccurred()) + defer func() { _ = db.Close() }() + + mock.ExpectQuery(`FROM pg_catalog.pg_stat_wal`). + WillReturnRows(sqlmock.NewRows(pg18Columns). + AddRow(int64(0), int64(0), int64(0), int64(0), nil)) + + stat, err := getPgStatWAL(db, semver.Version{Major: 18}) + Expect(err).ToNot(HaveOccurred()) + Expect(stat).NotTo(BeNil()) + Expect(stat.StatsReset.Valid).To(BeFalse()) + }) +}) diff --git a/pkg/management/postgres/webserver/metricserver/wal.go b/pkg/management/postgres/webserver/metricserver/wal.go index f8921cb000..f293f9ffb8 100644 --- a/pkg/management/postgres/webserver/metricserver/wal.go +++ b/pkg/management/postgres/webserver/metricserver/wal.go @@ -50,15 +50,15 @@ func collectPGStatWAL(e *Exporter) error { return err } walMetrics := e.Metrics.PgStatWalMetrics - walMetrics.WalRecords.WithLabelValues(walStat.StatsReset).Set(float64(walStat.WalRecords)) - walMetrics.WalFpi.WithLabelValues(walStat.StatsReset).Set(float64(walStat.WalFpi)) - walMetrics.WalBytes.WithLabelValues(walStat.StatsReset).Set(float64(walStat.WalBytes)) - walMetrics.WALBuffersFull.WithLabelValues(walStat.StatsReset).Set(float64(walStat.WALBuffersFull)) + walMetrics.WalRecords.WithLabelValues(walStat.StatsReset.String).Set(float64(walStat.WalRecords)) + walMetrics.WalFpi.WithLabelValues(walStat.StatsReset.String).Set(float64(walStat.WalFpi)) + walMetrics.WalBytes.WithLabelValues(walStat.StatsReset.String).Set(float64(walStat.WalBytes)) + walMetrics.WALBuffersFull.WithLabelValues(walStat.StatsReset.String).Set(float64(walStat.WALBuffersFull)) if version, _ := e.instance.GetPgVersion(); version.Major < 18 { - walMetrics.WalWrite.WithLabelValues(walStat.StatsReset).Set(float64(walStat.WalWrite)) - walMetrics.WalSync.WithLabelValues(walStat.StatsReset).Set(float64(walStat.WalSync)) - walMetrics.WalWriteTime.WithLabelValues(walStat.StatsReset).Set(walStat.WalWriteTime) - walMetrics.WalSyncTime.WithLabelValues(walStat.StatsReset).Set(walStat.WalSyncTime) + walMetrics.WalWrite.WithLabelValues(walStat.StatsReset.String).Set(float64(walStat.WalWrite)) + walMetrics.WalSync.WithLabelValues(walStat.StatsReset.String).Set(float64(walStat.WalSync)) + walMetrics.WalWriteTime.WithLabelValues(walStat.StatsReset.String).Set(walStat.WalWriteTime) + walMetrics.WalSyncTime.WithLabelValues(walStat.StatsReset.String).Set(walStat.WalSyncTime) } return nil