notify: don't use the global metrics registry (#1977)

* notify: don't use the global metrics registry

Signed-off-by: Simon Pasquier <spasquie@redhat.com>

* Address Max's comment

Signed-off-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
Simon Pasquier 2019-08-26 16:37:13 +02:00 committed by GitHub
parent 40b3facdf6
commit 9f7f4ead46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 76 additions and 65 deletions

View File

@ -371,6 +371,7 @@ func run() int {
tmpl *template.Template tmpl *template.Template
) )
pipelineBuilder := notify.NewPipelineBuilder(prometheus.DefaultRegisterer)
configCoordinator := config.NewCoordinator( configCoordinator := config.NewCoordinator(
*configFile, *configFile,
prometheus.DefaultRegisterer, prometheus.DefaultRegisterer,
@ -399,7 +400,7 @@ func run() int {
inhibitor = inhibit.NewInhibitor(alerts, conf.InhibitRules, marker, logger) inhibitor = inhibit.NewInhibitor(alerts, conf.InhibitRules, marker, logger)
silencer := silence.NewSilencer(silences, marker, logger) silencer := silence.NewSilencer(silences, marker, logger)
pipeline := notify.BuildPipeline( pipeline := pipelineBuilder.New(
receivers, receivers,
waitFunc, waitFunc,
inhibitor, inhibitor,

View File

@ -35,61 +35,6 @@ import (
"github.com/prometheus/alertmanager/types" "github.com/prometheus/alertmanager/types"
) )
var (
numNotifications = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "notifications_total",
Help: "The total number of attempted notifications.",
}, []string{"integration"})
numFailedNotifications = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "notifications_failed_total",
Help: "The total number of failed notifications.",
}, []string{"integration"})
notificationLatencySeconds = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "alertmanager",
Name: "notification_latency_seconds",
Help: "The latency of notifications in seconds.",
Buckets: []float64{1, 5, 10, 15, 20},
}, []string{"integration"})
)
func init() {
numNotifications.WithLabelValues("email")
numNotifications.WithLabelValues("hipchat")
numNotifications.WithLabelValues("pagerduty")
numNotifications.WithLabelValues("wechat")
numNotifications.WithLabelValues("pushover")
numNotifications.WithLabelValues("slack")
numNotifications.WithLabelValues("opsgenie")
numNotifications.WithLabelValues("webhook")
numNotifications.WithLabelValues("victorops")
numFailedNotifications.WithLabelValues("email")
numFailedNotifications.WithLabelValues("hipchat")
numFailedNotifications.WithLabelValues("pagerduty")
numFailedNotifications.WithLabelValues("wechat")
numFailedNotifications.WithLabelValues("pushover")
numFailedNotifications.WithLabelValues("slack")
numFailedNotifications.WithLabelValues("opsgenie")
numFailedNotifications.WithLabelValues("webhook")
numFailedNotifications.WithLabelValues("victorops")
notificationLatencySeconds.WithLabelValues("email")
notificationLatencySeconds.WithLabelValues("hipchat")
notificationLatencySeconds.WithLabelValues("pagerduty")
notificationLatencySeconds.WithLabelValues("wechat")
notificationLatencySeconds.WithLabelValues("pushover")
notificationLatencySeconds.WithLabelValues("slack")
notificationLatencySeconds.WithLabelValues("opsgenie")
notificationLatencySeconds.WithLabelValues("webhook")
notificationLatencySeconds.WithLabelValues("victorops")
prometheus.MustRegister(numNotifications)
prometheus.MustRegister(numFailedNotifications)
prometheus.MustRegister(notificationLatencySeconds)
}
// ResolvedSender returns true if resolved notifications should be sent. // ResolvedSender returns true if resolved notifications should be sent.
type ResolvedSender interface { type ResolvedSender interface {
SendResolved() bool SendResolved() bool
@ -261,8 +206,62 @@ type NotificationLog interface {
Query(params ...nflog.QueryParam) ([]*nflogpb.Entry, error) Query(params ...nflog.QueryParam) ([]*nflogpb.Entry, error)
} }
// BuildPipeline builds a map of receivers to Stages. type metrics struct {
func BuildPipeline( numNotifications *prometheus.CounterVec
numFailedNotifications *prometheus.CounterVec
notificationLatencySeconds *prometheus.HistogramVec
}
func newMetrics(r prometheus.Registerer) *metrics {
m := &metrics{
numNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "notifications_total",
Help: "The total number of attempted notifications.",
}, []string{"integration"}),
numFailedNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "notifications_failed_total",
Help: "The total number of failed notifications.",
}, []string{"integration"}),
notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "alertmanager",
Name: "notification_latency_seconds",
Help: "The latency of notifications in seconds.",
Buckets: []float64{1, 5, 10, 15, 20},
}, []string{"integration"}),
}
for _, integration := range []string{
"email",
"hipchat",
"pagerduty",
"wechat",
"pushover",
"slack",
"opsgenie",
"webhook",
"victorops",
} {
m.numNotifications.WithLabelValues(integration)
m.numFailedNotifications.WithLabelValues(integration)
m.notificationLatencySeconds.WithLabelValues(integration)
}
r.MustRegister(m.numNotifications, m.numFailedNotifications, m.notificationLatencySeconds)
return m
}
type PipelineBuilder struct {
metrics *metrics
}
func NewPipelineBuilder(r prometheus.Registerer) *PipelineBuilder {
return &PipelineBuilder{
metrics: newMetrics(r),
}
}
// New returns a map of receivers to Stages.
func (pb *PipelineBuilder) New(
receivers map[string][]Integration, receivers map[string][]Integration,
wait func() time.Duration, wait func() time.Duration,
inhibitor *inhibit.Inhibitor, inhibitor *inhibit.Inhibitor,
@ -277,14 +276,20 @@ func BuildPipeline(
ss := NewMuteStage(silencer) ss := NewMuteStage(silencer)
for name := range receivers { for name := range receivers {
st := createReceiverStage(name, receivers[name], wait, notificationLog) st := createReceiverStage(name, receivers[name], wait, notificationLog, pb.metrics)
rs[name] = MultiStage{ms, is, ss, st} rs[name] = MultiStage{ms, is, ss, st}
} }
return rs return rs
} }
// createReceiverStage creates a pipeline of stages for a receiver. // createReceiverStage creates a pipeline of stages for a receiver.
func createReceiverStage(name string, integrations []Integration, wait func() time.Duration, notificationLog NotificationLog) Stage { func createReceiverStage(
name string,
integrations []Integration,
wait func() time.Duration,
notificationLog NotificationLog,
metrics *metrics,
) Stage {
var fs FanoutStage var fs FanoutStage
for i := range integrations { for i := range integrations {
recv := &nflogpb.Receiver{ recv := &nflogpb.Receiver{
@ -295,7 +300,7 @@ func createReceiverStage(name string, integrations []Integration, wait func() ti
var s MultiStage var s MultiStage
s = append(s, NewWaitStage(wait)) s = append(s, NewWaitStage(wait))
s = append(s, NewDedupStage(&integrations[i], notificationLog, recv)) s = append(s, NewDedupStage(&integrations[i], notificationLog, recv))
s = append(s, NewRetryStage(integrations[i], name)) s = append(s, NewRetryStage(integrations[i], name, metrics))
s = append(s, NewSetNotifiesStage(notificationLog, recv)) s = append(s, NewSetNotifiesStage(notificationLog, recv))
fs = append(fs, s) fs = append(fs, s)
@ -594,13 +599,15 @@ func (n *DedupStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Al
type RetryStage struct { type RetryStage struct {
integration Integration integration Integration
groupName string groupName string
metrics *metrics
} }
// NewRetryStage returns a new instance of a RetryStage. // NewRetryStage returns a new instance of a RetryStage.
func NewRetryStage(i Integration, groupName string) *RetryStage { func NewRetryStage(i Integration, groupName string, metrics *metrics) *RetryStage {
return &RetryStage{ return &RetryStage{
integration: i, integration: i,
groupName: groupName, groupName: groupName,
metrics: metrics,
} }
} }
@ -653,10 +660,10 @@ func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Ale
case <-tick.C: case <-tick.C:
now := time.Now() now := time.Now()
retry, err := r.integration.Notify(ctx, sent...) retry, err := r.integration.Notify(ctx, sent...)
notificationLatencySeconds.WithLabelValues(r.integration.Name()).Observe(time.Since(now).Seconds()) r.metrics.notificationLatencySeconds.WithLabelValues(r.integration.Name()).Observe(time.Since(now).Seconds())
numNotifications.WithLabelValues(r.integration.Name()).Inc() r.metrics.numNotifications.WithLabelValues(r.integration.Name()).Inc()
if err != nil { if err != nil {
numFailedNotifications.WithLabelValues(r.integration.Name()).Inc() r.metrics.numFailedNotifications.WithLabelValues(r.integration.Name()).Inc()
level.Debug(l).Log("msg", "Notify attempt failed", "attempt", i, "integration", r.integration.Name(), "receiver", r.groupName, "err", err) level.Debug(l).Log("msg", "Notify attempt failed", "attempt", i, "integration", r.integration.Name(), "receiver", r.groupName, "err", err)
if !retry { if !retry {
return ctx, alerts, fmt.Errorf("cancelling notify retry for %q due to unrecoverable error: %s", r.integration.Name(), err) return ctx, alerts, fmt.Errorf("cancelling notify retry for %q due to unrecoverable error: %s", r.integration.Name(), err)

View File

@ -389,6 +389,7 @@ func TestRetryStageWithError(t *testing.T) {
} }
r := RetryStage{ r := RetryStage{
integration: i, integration: i,
metrics: newMetrics(prometheus.NewRegistry()),
} }
alerts := []*types.Alert{ alerts := []*types.Alert{
@ -429,6 +430,7 @@ func TestRetryStageNoResolved(t *testing.T) {
} }
r := RetryStage{ r := RetryStage{
integration: i, integration: i,
metrics: newMetrics(prometheus.NewRegistry()),
} }
alerts := []*types.Alert{ alerts := []*types.Alert{
@ -482,6 +484,7 @@ func TestRetryStageSendResolved(t *testing.T) {
} }
r := RetryStage{ r := RetryStage{
integration: i, integration: i,
metrics: newMetrics(prometheus.NewRegistry()),
} }
alerts := []*types.Alert{ alerts := []*types.Alert{