Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ global:
max_idle_connections: 3
# Maximum amount of time a connection may be reused to any one target. Infinite by default.
max_connection_lifetime: 10m
# Expose per-query `query_duration_seconds` and `query_rows_returned` gauges, labelled with the
# `query` name (and `target` in multi-target mode). Off by default to keep the metric surface stable.
enable_query_metrics: false

# The target to monitor and the list of collectors to execute on it.
target:
Expand Down Expand Up @@ -217,6 +220,23 @@ Kubernetes-native ServiceMonitor automatically configures Prometheus for HTTPS s

## Miscellaneous

<details>
<summary>Per-query observability metrics</summary>

When `global.enable_query_metrics` is set to `true`, every scrape emits two additional gauges per query
in the configuration:

- `query_duration_seconds{query="<query_name>"}` — wall-clock time the query took during the most
recent scrape, including row scanning. Emitted even when the query errors, so spikes preceding a
failure remain visible.
- `query_rows_returned{query="<query_name>"}` — number of rows the database returned during the most
recent scrape. Errored or skipped rows are not counted.

Both metrics inherit the same constant labels as `up` / `scrape_duration_seconds` (notably `target` in
multi-target / jobs mode), so they can be aggregated by target the same way. The feature is off by
default to keep the existing metric surface unchanged.
</details>

<details>
<summary>Handling NULL values</summary>

Expand Down
4 changes: 2 additions & 2 deletions collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type collector struct {

// NewCollector returns a new Collector with the given configuration and database. The metrics it creates will all have
// the provided const labels applied.
func NewCollector(logContext string, cc *config.CollectorConfig, constLabels []*dto.LabelPair) (Collector, errors.WithContext) {
func NewCollector(logContext string, cc *config.CollectorConfig, constLabels []*dto.LabelPair, enableQueryMetrics bool) (Collector, errors.WithContext) {
logContext = TrimMissingCtx(fmt.Sprintf(`%s,collector=%s`, logContext, cc.Name))

// Maps each query to the list of metric families it populates.
Expand All @@ -53,7 +53,7 @@ func NewCollector(logContext string, cc *config.CollectorConfig, constLabels []*
// Instantiate queries.
queries := make([]*Query, 0, len(cc.Metrics))
for qc, mfs := range queryMFs {
q, err := NewQuery(logContext, qc, mfs...)
q, err := NewQuery(logContext, qc, constLabels, enableQueryMetrics, mfs...)
if err != nil {
return nil, err
}
Expand Down
3 changes: 3 additions & 0 deletions config/global_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ type GlobalConfig struct {
MaxConns int `yaml:"max_connections" env:"MAX_CONNECTIONS"` // maximum number of open connections to any one target
MaxIdleConns int `yaml:"max_idle_connections" env:"MAX_IDLE_CONNECTIONS"` // maximum number of idle connections to any one target

EnableQueryMetrics bool `yaml:"enable_query_metrics,omitempty" env:"ENABLE_QUERY_METRICS"` // expose per-query duration and row count metrics

// Catches all undefined fields and must be empty after parsing.
XXX map[string]any `yaml:",inline" json:"-"`
}
Expand All @@ -41,6 +43,7 @@ func (g *GlobalConfig) UnmarshalYAML(unmarshal func(any) error) error {
g.MaxIdleConns = 3
g.MaxConnLifetime = time.Duration(0)
g.WarmupDelay = model.Duration(0)
g.EnableQueryMetrics = false

type plain GlobalConfig
if err := unmarshal((*plain)(g)); err != nil {
Expand Down
37 changes: 36 additions & 1 deletion query.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ import (
"database/sql"
"fmt"
"log/slog"
"sort"
"time"

"github.com/burningalchemist/sql_exporter/config"
"github.com/burningalchemist/sql_exporter/errors"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)

// Query wraps a sql.Stmt and all the metrics populated from it. It helps extract keys and values from result rows.
Expand All @@ -19,6 +22,9 @@ type Query struct {
columnTypes columnTypeMap
logContext string

durationDesc MetricDesc
rowsDesc MetricDesc

conn *sql.DB
stmt *sql.Stmt
}
Expand All @@ -35,7 +41,7 @@ const (
)

// NewQuery returns a new Query that will populate the given metric families.
func NewQuery(logContext string, qc *config.QueryConfig, metricFamilies ...*MetricFamily) (*Query, errors.WithContext) {
func NewQuery(logContext string, qc *config.QueryConfig, constLabels []*dto.LabelPair, enableQueryMetrics bool, metricFamilies ...*MetricFamily) (*Query, errors.WithContext) {
logContext = TrimMissingCtx(fmt.Sprintf(`%s,query=%s`, logContext, qc.Name))

columnTypes := make(columnTypeMap)
Expand All @@ -58,11 +64,30 @@ func NewQuery(logContext string, qc *config.QueryConfig, metricFamilies ...*Metr
}
}

var durationDesc, rowsDesc MetricDesc
if enableQueryMetrics {
autoLabels := make([]*dto.LabelPair, 0, len(constLabels)+1)
autoLabels = append(autoLabels, constLabels...)
queryName := qc.Name
queryLabel := queryLabelName
autoLabels = append(autoLabels, &dto.LabelPair{
Name: &queryLabel,
Value: &queryName,
})
sort.Sort(labelPairSorter(autoLabels))
durationDesc = NewAutomaticMetricDesc(logContext, queryDurationName, queryDurationHelp,
prometheus.GaugeValue, autoLabels)
rowsDesc = NewAutomaticMetricDesc(logContext, queryRowsName, queryRowsHelp,
prometheus.GaugeValue, autoLabels)
}

q := Query{
config: qc,
metricFamilies: metricFamilies,
columnTypes: columnTypes,
logContext: logContext,
durationDesc: durationDesc,
rowsDesc: rowsDesc,
}
return &q, nil
}
Expand All @@ -82,6 +107,15 @@ func setColumnType(logContext, columnName string, ctype columnType, columnTypes

// Collect is the equivalent of prometheus.Collector.Collect() but takes a context to run in and a database to run on.
func (q *Query) Collect(ctx context.Context, conn *sql.DB, ch chan<- Metric) {
start := time.Now()
var rowCount uint64
defer func() {
if q.durationDesc != nil {
ch <- NewMetric(q.durationDesc, time.Since(start).Seconds())
ch <- NewMetric(q.rowsDesc, float64(rowCount))
}
}()

if ctx.Err() != nil {
ch <- NewInvalidMetric(errors.Wrap(q.logContext, ctx.Err()))

Expand Down Expand Up @@ -114,6 +148,7 @@ func (q *Query) Collect(ctx context.Context, conn *sql.DB, ch chan<- Metric) {
ch <- NewInvalidMetric(err)
continue
}
rowCount++
for _, mf := range q.metricFamilies {
mf.Collect(row, ch)
}
Expand Down
66 changes: 66 additions & 0 deletions query_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package sql_exporter

import (
"testing"

"github.com/burningalchemist/sql_exporter/config"
dto "github.com/prometheus/client_model/go"
)

func TestNewQueryAutoMetricsDisabled(t *testing.T) {
q, err := NewQuery("", &config.QueryConfig{Name: "q1", Query: "SELECT 1"}, nil, false)
if err != nil {
t.Fatalf("NewQuery: %v", err)
}
if q.durationDesc != nil || q.rowsDesc != nil {
t.Fatalf("expected no auto-metric descs when disabled, got duration=%v rows=%v", q.durationDesc, q.rowsDesc)
}
}

func TestNewQueryAutoMetricsEnabled(t *testing.T) {
targetName, targetVal := "target", "db1"
constLabels := []*dto.LabelPair{{Name: &targetName, Value: &targetVal}}

q, err := NewQuery("", &config.QueryConfig{Name: "q1", Query: "SELECT 1"}, constLabels, true)
if err != nil {
t.Fatalf("NewQuery: %v", err)
}
if q.durationDesc == nil || q.rowsDesc == nil {
t.Fatalf("expected auto-metric descs to be set when enabled")
}
if got := q.durationDesc.Name(); got != queryDurationName {
t.Errorf("duration metric name = %q, want %q", got, queryDurationName)
}
if got := q.rowsDesc.Name(); got != queryRowsName {
t.Errorf("rows metric name = %q, want %q", got, queryRowsName)
}

gotLabels := q.durationDesc.ConstLabels()
if len(gotLabels) != 2 {
t.Fatalf("expected 2 const labels (target, query), got %d", len(gotLabels))
}
labels := make(map[string]string, len(gotLabels))
for _, lp := range gotLabels {
labels[lp.GetName()] = lp.GetValue()
}
if labels[queryLabelName] != "q1" {
t.Errorf("query label = %q, want q1", labels[queryLabelName])
}
if labels["target"] != "db1" {
t.Errorf("target label = %q, want db1", labels["target"])
}
}

func TestNewQueryAutoMetricsEnabledNoConstLabels(t *testing.T) {
q, err := NewQuery("", &config.QueryConfig{Name: "singleton", Query: "SELECT 1"}, nil, true)
if err != nil {
t.Fatalf("NewQuery: %v", err)
}
gotLabels := q.durationDesc.ConstLabels()
if len(gotLabels) != 1 {
t.Fatalf("expected just the query label, got %d labels", len(gotLabels))
}
if gotLabels[0].GetName() != queryLabelName || gotLabels[0].GetValue() != "singleton" {
t.Errorf("expected query=singleton, got %s=%s", gotLabels[0].GetName(), gotLabels[0].GetValue())
}
}
7 changes: 6 additions & 1 deletion target.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ const (
upMetricHelp = "1 if the target is reachable, or 0 if the scrape failed"
scrapeDurationName = "scrape_duration_seconds"
scrapeDurationHelp = "How long it took to scrape the target in seconds"
queryDurationName = "query_duration_seconds"
queryDurationHelp = "How long the named query took to execute in seconds (last scrape)"
queryRowsName = "query_rows_returned"
queryRowsHelp = "Number of rows returned by the named query (last scrape)"
queryLabelName = "query"
)

// Target collects SQL metrics from a single sql.DB instance. It aggregates one or more Collectors and it looks much
Expand Down Expand Up @@ -86,7 +91,7 @@ func NewTarget(

collectors := make([]Collector, 0, len(ccs))
for _, cc := range ccs {
c, err := NewCollector(logContext, cc, constLabelPairs)
c, err := NewCollector(logContext, cc, constLabelPairs, gc.EnableQueryMetrics)
if err != nil {
return nil, err
}
Expand Down
Loading