diff --git a/README.md b/README.md
index cd50b79e..7d678e4a 100644
--- a/README.md
+++ b/README.md
@@ -111,6 +111,9 @@ global:
max_idle_connections: 3
# Maximum amount of time a connection may be reused to any one target. Infinite by default.
max_connection_lifetime: 10m
+ # Expose per-query `query_duration_seconds` and `query_rows_returned` gauges, labelled with the
+ # `query` name (and `target` in multi-target mode). Off by default to keep the metric surface stable.
+ enable_query_metrics: false
# The target to monitor and the list of collectors to execute on it.
target:
@@ -217,6 +220,23 @@ Kubernetes-native ServiceMonitor automatically configures Prometheus for HTTPS s
## Miscellaneous
+
+Per-query observability metrics
+
+When `global.enable_query_metrics` is set to `true`, every scrape emits two additional gauges per query
+in the configuration:
+
+- `query_duration_seconds{query=""}` — wall-clock time the query took during the most
+ recent scrape, including row scanning. Emitted even when the query errors, so spikes preceding a
+ failure remain visible.
+- `query_rows_returned{query=""}` — number of rows the database returned during the most
+ recent scrape. Errored or skipped rows are not counted.
+
+Both metrics inherit the same constant labels as `up` / `scrape_duration_seconds` (notably `target` in
+multi-target / jobs mode), so they can be aggregated by target the same way. The feature is off by
+default to keep the existing metric surface unchanged.
+
+
Handling NULL values
diff --git a/collector.go b/collector.go
index 4668c85f..0ab7bbac 100644
--- a/collector.go
+++ b/collector.go
@@ -31,7 +31,7 @@ type collector struct {
// NewCollector returns a new Collector with the given configuration and database. The metrics it creates will all have
// the provided const labels applied.
-func NewCollector(logContext string, cc *config.CollectorConfig, constLabels []*dto.LabelPair) (Collector, errors.WithContext) {
+func NewCollector(logContext string, cc *config.CollectorConfig, constLabels []*dto.LabelPair, enableQueryMetrics bool) (Collector, errors.WithContext) {
logContext = TrimMissingCtx(fmt.Sprintf(`%s,collector=%s`, logContext, cc.Name))
// Maps each query to the list of metric families it populates.
@@ -53,7 +53,7 @@ func NewCollector(logContext string, cc *config.CollectorConfig, constLabels []*
// Instantiate queries.
queries := make([]*Query, 0, len(cc.Metrics))
for qc, mfs := range queryMFs {
- q, err := NewQuery(logContext, qc, mfs...)
+ q, err := NewQuery(logContext, qc, constLabels, enableQueryMetrics, mfs...)
if err != nil {
return nil, err
}
diff --git a/config/global_config.go b/config/global_config.go
index 9a12c074..2345d574 100644
--- a/config/global_config.go
+++ b/config/global_config.go
@@ -21,6 +21,8 @@ type GlobalConfig struct {
MaxConns int `yaml:"max_connections" env:"MAX_CONNECTIONS"` // maximum number of open connections to any one target
MaxIdleConns int `yaml:"max_idle_connections" env:"MAX_IDLE_CONNECTIONS"` // maximum number of idle connections to any one target
+ EnableQueryMetrics bool `yaml:"enable_query_metrics,omitempty" env:"ENABLE_QUERY_METRICS"` // expose per-query duration and row count metrics
+
// Catches all undefined fields and must be empty after parsing.
XXX map[string]any `yaml:",inline" json:"-"`
}
@@ -41,6 +43,7 @@ func (g *GlobalConfig) UnmarshalYAML(unmarshal func(any) error) error {
g.MaxIdleConns = 3
g.MaxConnLifetime = time.Duration(0)
g.WarmupDelay = model.Duration(0)
+ g.EnableQueryMetrics = false
type plain GlobalConfig
if err := unmarshal((*plain)(g)); err != nil {
diff --git a/query.go b/query.go
index 205312ad..a360caf4 100644
--- a/query.go
+++ b/query.go
@@ -5,10 +5,13 @@ import (
"database/sql"
"fmt"
"log/slog"
+ "sort"
"time"
"github.com/burningalchemist/sql_exporter/config"
"github.com/burningalchemist/sql_exporter/errors"
+ "github.com/prometheus/client_golang/prometheus"
+ dto "github.com/prometheus/client_model/go"
)
// Query wraps a sql.Stmt and all the metrics populated from it. It helps extract keys and values from result rows.
@@ -19,6 +22,9 @@ type Query struct {
columnTypes columnTypeMap
logContext string
+ durationDesc MetricDesc
+ rowsDesc MetricDesc
+
conn *sql.DB
stmt *sql.Stmt
}
@@ -35,7 +41,7 @@ const (
)
// NewQuery returns a new Query that will populate the given metric families.
-func NewQuery(logContext string, qc *config.QueryConfig, metricFamilies ...*MetricFamily) (*Query, errors.WithContext) {
+func NewQuery(logContext string, qc *config.QueryConfig, constLabels []*dto.LabelPair, enableQueryMetrics bool, metricFamilies ...*MetricFamily) (*Query, errors.WithContext) {
logContext = TrimMissingCtx(fmt.Sprintf(`%s,query=%s`, logContext, qc.Name))
columnTypes := make(columnTypeMap)
@@ -58,11 +64,30 @@ func NewQuery(logContext string, qc *config.QueryConfig, metricFamilies ...*Metr
}
}
+ var durationDesc, rowsDesc MetricDesc
+ if enableQueryMetrics {
+ autoLabels := make([]*dto.LabelPair, 0, len(constLabels)+1)
+ autoLabels = append(autoLabels, constLabels...)
+ queryName := qc.Name
+ queryLabel := queryLabelName
+ autoLabels = append(autoLabels, &dto.LabelPair{
+ Name: &queryLabel,
+ Value: &queryName,
+ })
+ sort.Sort(labelPairSorter(autoLabels))
+ durationDesc = NewAutomaticMetricDesc(logContext, queryDurationName, queryDurationHelp,
+ prometheus.GaugeValue, autoLabels)
+ rowsDesc = NewAutomaticMetricDesc(logContext, queryRowsName, queryRowsHelp,
+ prometheus.GaugeValue, autoLabels)
+ }
+
q := Query{
config: qc,
metricFamilies: metricFamilies,
columnTypes: columnTypes,
logContext: logContext,
+ durationDesc: durationDesc,
+ rowsDesc: rowsDesc,
}
return &q, nil
}
@@ -82,6 +107,15 @@ func setColumnType(logContext, columnName string, ctype columnType, columnTypes
// Collect is the equivalent of prometheus.Collector.Collect() but takes a context to run in and a database to run on.
func (q *Query) Collect(ctx context.Context, conn *sql.DB, ch chan<- Metric) {
+ start := time.Now()
+ var rowCount uint64
+ defer func() {
+ if q.durationDesc != nil {
+ ch <- NewMetric(q.durationDesc, time.Since(start).Seconds())
+ ch <- NewMetric(q.rowsDesc, float64(rowCount))
+ }
+ }()
+
if ctx.Err() != nil {
ch <- NewInvalidMetric(errors.Wrap(q.logContext, ctx.Err()))
@@ -114,6 +148,7 @@ func (q *Query) Collect(ctx context.Context, conn *sql.DB, ch chan<- Metric) {
ch <- NewInvalidMetric(err)
continue
}
+ rowCount++
for _, mf := range q.metricFamilies {
mf.Collect(row, ch)
}
diff --git a/query_test.go b/query_test.go
new file mode 100644
index 00000000..0d8e0657
--- /dev/null
+++ b/query_test.go
@@ -0,0 +1,66 @@
+package sql_exporter
+
+import (
+ "testing"
+
+ "github.com/burningalchemist/sql_exporter/config"
+ dto "github.com/prometheus/client_model/go"
+)
+
+func TestNewQueryAutoMetricsDisabled(t *testing.T) {
+ q, err := NewQuery("", &config.QueryConfig{Name: "q1", Query: "SELECT 1"}, nil, false)
+ if err != nil {
+ t.Fatalf("NewQuery: %v", err)
+ }
+ if q.durationDesc != nil || q.rowsDesc != nil {
+ t.Fatalf("expected no auto-metric descs when disabled, got duration=%v rows=%v", q.durationDesc, q.rowsDesc)
+ }
+}
+
+func TestNewQueryAutoMetricsEnabled(t *testing.T) {
+ targetName, targetVal := "target", "db1"
+ constLabels := []*dto.LabelPair{{Name: &targetName, Value: &targetVal}}
+
+ q, err := NewQuery("", &config.QueryConfig{Name: "q1", Query: "SELECT 1"}, constLabels, true)
+ if err != nil {
+ t.Fatalf("NewQuery: %v", err)
+ }
+ if q.durationDesc == nil || q.rowsDesc == nil {
+ t.Fatalf("expected auto-metric descs to be set when enabled")
+ }
+ if got := q.durationDesc.Name(); got != queryDurationName {
+ t.Errorf("duration metric name = %q, want %q", got, queryDurationName)
+ }
+ if got := q.rowsDesc.Name(); got != queryRowsName {
+ t.Errorf("rows metric name = %q, want %q", got, queryRowsName)
+ }
+
+ gotLabels := q.durationDesc.ConstLabels()
+ if len(gotLabels) != 2 {
+ t.Fatalf("expected 2 const labels (target, query), got %d", len(gotLabels))
+ }
+ labels := make(map[string]string, len(gotLabels))
+ for _, lp := range gotLabels {
+ labels[lp.GetName()] = lp.GetValue()
+ }
+ if labels[queryLabelName] != "q1" {
+ t.Errorf("query label = %q, want q1", labels[queryLabelName])
+ }
+ if labels["target"] != "db1" {
+ t.Errorf("target label = %q, want db1", labels["target"])
+ }
+}
+
+func TestNewQueryAutoMetricsEnabledNoConstLabels(t *testing.T) {
+ q, err := NewQuery("", &config.QueryConfig{Name: "singleton", Query: "SELECT 1"}, nil, true)
+ if err != nil {
+ t.Fatalf("NewQuery: %v", err)
+ }
+ gotLabels := q.durationDesc.ConstLabels()
+ if len(gotLabels) != 1 {
+ t.Fatalf("expected just the query label, got %d labels", len(gotLabels))
+ }
+ if gotLabels[0].GetName() != queryLabelName || gotLabels[0].GetValue() != "singleton" {
+ t.Errorf("expected query=singleton, got %s=%s", gotLabels[0].GetName(), gotLabels[0].GetValue())
+ }
+}
diff --git a/target.go b/target.go
index 75fb0e31..11608e81 100644
--- a/target.go
+++ b/target.go
@@ -24,6 +24,11 @@ const (
upMetricHelp = "1 if the target is reachable, or 0 if the scrape failed"
scrapeDurationName = "scrape_duration_seconds"
scrapeDurationHelp = "How long it took to scrape the target in seconds"
+ queryDurationName = "query_duration_seconds"
+ queryDurationHelp = "How long the named query took to execute in seconds (last scrape)"
+ queryRowsName = "query_rows_returned"
+ queryRowsHelp = "Number of rows returned by the named query (last scrape)"
+ queryLabelName = "query"
)
// Target collects SQL metrics from a single sql.DB instance. It aggregates one or more Collectors and it looks much
@@ -86,7 +91,7 @@ func NewTarget(
collectors := make([]Collector, 0, len(ccs))
for _, cc := range ccs {
- c, err := NewCollector(logContext, cc, constLabelPairs)
+ c, err := NewCollector(logContext, cc, constLabelPairs, gc.EnableQueryMetrics)
if err != nil {
return nil, err
}