mirror of
https://github.com/prometheus/alertmanager
synced 2024-12-25 23:52:12 +00:00
Signed-off-by: Max Neverov <neverov.max@gmail.com>
This commit is contained in:
parent
3974abd0f4
commit
c39b787800
@ -213,9 +213,11 @@ type NotificationLog interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type metrics struct {
|
type metrics struct {
|
||||||
numNotifications *prometheus.CounterVec
|
numNotifications *prometheus.CounterVec
|
||||||
numFailedNotifications *prometheus.CounterVec
|
numTotalFailedNotifications *prometheus.CounterVec
|
||||||
notificationLatencySeconds *prometheus.HistogramVec
|
numNotificationRequestsTotal *prometheus.CounterVec
|
||||||
|
numNotificationRequestsFailedTotal *prometheus.CounterVec
|
||||||
|
notificationLatencySeconds *prometheus.HistogramVec
|
||||||
}
|
}
|
||||||
|
|
||||||
func newMetrics(r prometheus.Registerer) *metrics {
|
func newMetrics(r prometheus.Registerer) *metrics {
|
||||||
@ -225,11 +227,21 @@ func newMetrics(r prometheus.Registerer) *metrics {
|
|||||||
Name: "notifications_total",
|
Name: "notifications_total",
|
||||||
Help: "The total number of attempted notifications.",
|
Help: "The total number of attempted notifications.",
|
||||||
}, []string{"integration"}),
|
}, []string{"integration"}),
|
||||||
numFailedNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
|
numTotalFailedNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
Namespace: "alertmanager",
|
Namespace: "alertmanager",
|
||||||
Name: "notifications_failed_total",
|
Name: "notifications_failed_total",
|
||||||
Help: "The total number of failed notifications.",
|
Help: "The total number of failed notifications.",
|
||||||
}, []string{"integration"}),
|
}, []string{"integration"}),
|
||||||
|
numNotificationRequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "alertmanager",
|
||||||
|
Name: "notification_requests_total",
|
||||||
|
Help: "The total number of attempted notification requests.",
|
||||||
|
}, []string{"integration"}),
|
||||||
|
numNotificationRequestsFailedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "alertmanager",
|
||||||
|
Name: "notification_requests_failed_total",
|
||||||
|
Help: "The total number of failed notification requests.",
|
||||||
|
}, []string{"integration"}),
|
||||||
notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
Namespace: "alertmanager",
|
Namespace: "alertmanager",
|
||||||
Name: "notification_latency_seconds",
|
Name: "notification_latency_seconds",
|
||||||
@ -248,10 +260,16 @@ func newMetrics(r prometheus.Registerer) *metrics {
|
|||||||
"victorops",
|
"victorops",
|
||||||
} {
|
} {
|
||||||
m.numNotifications.WithLabelValues(integration)
|
m.numNotifications.WithLabelValues(integration)
|
||||||
m.numFailedNotifications.WithLabelValues(integration)
|
m.numTotalFailedNotifications.WithLabelValues(integration)
|
||||||
|
m.numNotificationRequestsTotal.WithLabelValues(integration)
|
||||||
|
m.numNotificationRequestsFailedTotal.WithLabelValues(integration)
|
||||||
m.notificationLatencySeconds.WithLabelValues(integration)
|
m.notificationLatencySeconds.WithLabelValues(integration)
|
||||||
}
|
}
|
||||||
r.MustRegister(m.numNotifications, m.numFailedNotifications, m.notificationLatencySeconds)
|
r.MustRegister(
|
||||||
|
m.numNotifications, m.numTotalFailedNotifications,
|
||||||
|
m.numNotificationRequestsTotal, m.numNotificationRequestsFailedTotal,
|
||||||
|
m.notificationLatencySeconds,
|
||||||
|
)
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,7 +407,7 @@ func NewGossipSettleStage(p *cluster.Peer) *GossipSettleStage {
|
|||||||
return &GossipSettleStage{peer: p}
|
return &GossipSettleStage{peer: p}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *GossipSettleStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
func (n *GossipSettleStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
||||||
if n.peer != nil {
|
if n.peer != nil {
|
||||||
n.peer.WaitReady()
|
n.peer.WaitReady()
|
||||||
}
|
}
|
||||||
@ -407,7 +425,7 @@ func NewMuteStage(m types.Muter) *MuteStage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Exec implements the Stage interface.
|
// Exec implements the Stage interface.
|
||||||
func (n *MuteStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
func (n *MuteStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
||||||
var filtered []*types.Alert
|
var filtered []*types.Alert
|
||||||
for _, a := range alerts {
|
for _, a := range alerts {
|
||||||
// TODO(fabxc): increment total alerts counter.
|
// TODO(fabxc): increment total alerts counter.
|
||||||
@ -434,7 +452,7 @@ func NewWaitStage(wait func() time.Duration) *WaitStage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Exec implements the Stage interface.
|
// Exec implements the Stage interface.
|
||||||
func (ws *WaitStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
func (ws *WaitStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
||||||
select {
|
select {
|
||||||
case <-time.After(ws.wait()):
|
case <-time.After(ws.wait()):
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
@ -541,7 +559,7 @@ func (n *DedupStage) needsUpdate(entry *nflogpb.Entry, firing, resolved map[uint
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Exec implements the Stage interface.
|
// Exec implements the Stage interface.
|
||||||
func (n *DedupStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
func (n *DedupStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
||||||
gkey, ok := GroupKey(ctx)
|
gkey, ok := GroupKey(ctx)
|
||||||
if !ok {
|
if !ok {
|
||||||
return ctx, nil, errors.New("group key missing")
|
return ctx, nil, errors.New("group key missing")
|
||||||
@ -609,8 +627,16 @@ func NewRetryStage(i Integration, groupName string, metrics *metrics) *RetryStag
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exec implements the Stage interface.
|
|
||||||
func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
||||||
|
r.metrics.numNotifications.WithLabelValues(r.integration.Name()).Inc()
|
||||||
|
ctx, alerts, err := r.exec(ctx, l, alerts...)
|
||||||
|
if err != nil {
|
||||||
|
r.metrics.numTotalFailedNotifications.WithLabelValues(r.integration.Name()).Inc()
|
||||||
|
}
|
||||||
|
return ctx, alerts, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r RetryStage) exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
||||||
var sent []*types.Alert
|
var sent []*types.Alert
|
||||||
|
|
||||||
// If we shouldn't send notifications for resolved alerts, but there are only
|
// If we shouldn't send notifications for resolved alerts, but there are only
|
||||||
@ -663,9 +689,9 @@ func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Ale
|
|||||||
now := time.Now()
|
now := time.Now()
|
||||||
retry, err := r.integration.Notify(ctx, sent...)
|
retry, err := r.integration.Notify(ctx, sent...)
|
||||||
r.metrics.notificationLatencySeconds.WithLabelValues(r.integration.Name()).Observe(time.Since(now).Seconds())
|
r.metrics.notificationLatencySeconds.WithLabelValues(r.integration.Name()).Observe(time.Since(now).Seconds())
|
||||||
r.metrics.numNotifications.WithLabelValues(r.integration.Name()).Inc()
|
r.metrics.numNotificationRequestsTotal.WithLabelValues(r.integration.Name()).Inc()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.metrics.numFailedNotifications.WithLabelValues(r.integration.Name()).Inc()
|
r.metrics.numNotificationRequestsFailedTotal.WithLabelValues(r.integration.Name()).Inc()
|
||||||
if !retry {
|
if !retry {
|
||||||
return ctx, alerts, errors.Wrapf(err, "%s/%s: notify retry canceled due to unrecoverable error after %d attempts", r.groupName, r.integration.String(), i)
|
return ctx, alerts, errors.Wrapf(err, "%s/%s: notify retry canceled due to unrecoverable error after %d attempts", r.groupName, r.integration.String(), i)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user