Add duration to the notify success message (#3559)

This commit updates Alertmanager to add a duration to the notify
success message. It complements the existing histogram to offer
fine-grained information about notification attempts. This can be
useful when debuggin duplicate notifications, for example, when
the duration exceeds peer_timeout.

Signed-off-by: George Robinson <george.robinson@grafana.com>
This commit is contained in:
George Robinson 2023-10-17 09:50:33 +01:00 committed by GitHub
parent bdcff29194
commit 98290c3349
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -792,7 +792,8 @@ func (r RetryStage) exec(ctx context.Context, l log.Logger, alerts ...*types.Ale
case <-tick.C:
now := time.Now()
retry, err := r.integration.Notify(ctx, sent...)
r.metrics.notificationLatencySeconds.WithLabelValues(r.labelValues...).Observe(time.Since(now).Seconds())
dur := time.Since(now)
r.metrics.notificationLatencySeconds.WithLabelValues(r.labelValues...).Observe(dur.Seconds())
r.metrics.numNotificationRequestsTotal.WithLabelValues(r.labelValues...).Inc()
if err != nil {
r.metrics.numNotificationRequestsFailedTotal.WithLabelValues(r.labelValues...).Inc()
@ -813,7 +814,7 @@ func (r RetryStage) exec(ctx context.Context, l log.Logger, alerts ...*types.Ale
lvl = level.Debug(log.With(l, "alerts", fmt.Sprintf("%v", alerts)))
}
lvl.Log("msg", "Notify success", "attempts", i)
lvl.Log("msg", "Notify success", "attempts", i, "duration", dur)
return ctx, alerts, nil
}
case <-ctx.Done():