From 0211bce95669d437410308783fce043f5d3ce1c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santiago=20Hern=C3=A1ndez?= Date: Tue, 11 Nov 2025 13:47:37 +0100 Subject: [PATCH] Log a warning if the peer timeout for the replica is greater than the repeat interval for a route --- notify/grafana_alertmanager.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/notify/grafana_alertmanager.go b/notify/grafana_alertmanager.go index 59c325888..d140aa313 100644 --- a/notify/grafana_alertmanager.go +++ b/notify/grafana_alertmanager.go @@ -711,6 +711,11 @@ func (am *GrafanaAlertmanager) ApplyConfig(cfg NotificationsConfiguration) (err return err } + if pos := am.opts.Peer.Position(); pos != 0 { + waitTime := time.Duration(pos) * am.opts.PeerTimeout + am.checkRepeatIntervals(waitTime, cfg.RoutingTree) + } + // Now, let's put together our notification pipeline routingStage := make(notify.RoutingStage, len(integrationsMap)) @@ -769,6 +774,17 @@ func (am *GrafanaAlertmanager) ApplyConfig(cfg NotificationsConfiguration) (err return nil } +// checkRepeatIntervals logs a warning if pos * peer_timeout > repeat_interval for any route. +// If the time we wait before trying to send a notification is greater than the repeat interval, we risk sending duplicate notifications. +func (am *GrafanaAlertmanager) checkRepeatIntervals(waitTime time.Duration, route *Route) { + if route.RepeatInterval != nil && time.Duration(*route.RepeatInterval) < waitTime { + level.Warn(am.logger).Log("Route's repeat_interval is shorter than the waiting period for the current peer. This can lead to duplicate notifications", "repeat_interval", *route.RepeatInterval, "wait_time_for_peer", waitTime) + } + for _, r := range route.Routes { + am.checkRepeatIntervals(waitTime, r) + } +} + func (am *GrafanaAlertmanager) setInhibitionRulesMetrics(r []InhibitRule) { am.opts.Metrics.configuredInhibitionRules.WithLabelValues(am.tenantString()).Set(float64(len(r))) }