dispatch: add metrics (#2113)

Signed-off-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
Simon Pasquier 2019-11-26 09:04:56 +01:00 committed by GitHub
parent 21e99dcb63
commit 4f45457b9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 41 additions and 5 deletions

View File

@ -371,6 +371,7 @@ func run() int {
tmpl *template.Template
)
dispMetrics := dispatch.NewDispatcherMetrics(prometheus.DefaultRegisterer)
pipelineBuilder := notify.NewPipelineBuilder(prometheus.DefaultRegisterer)
configCoordinator := config.NewCoordinator(
*configFile,
@ -415,7 +416,7 @@ func run() int {
})
routes := dispatch.NewRoute(conf.Route, nil)
disp = dispatch.NewDispatcher(alerts, routes, pipeline, marker, timeoutFunc, logger)
disp = dispatch.NewDispatcher(alerts, routes, pipeline, marker, timeoutFunc, logger, dispMetrics)
walkRoute(routes, func(r *dispatch.Route) {
if r.RouteOpts.RepeatInterval > *retention {
level.Warn(log.With(logger, "component", "configuration")).Log(

View File

@ -22,6 +22,7 @@ import (
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/alertmanager/notify"
@ -30,12 +31,39 @@ import (
"github.com/prometheus/alertmanager/types"
)
// DispatcherMetrics represents metrics associated to a dispatcher.
type DispatcherMetrics struct {
aggrGroups prometheus.Gauge
processingDuration prometheus.Summary
}
// NewDispatcherMetrics returns a new registered DispatchMetrics.
func NewDispatcherMetrics(r prometheus.Registerer) *DispatcherMetrics {
m := DispatcherMetrics{
aggrGroups: prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "alertmanager_dispatcher_aggregation_groups",
Help: "Number of active aggregation groups",
},
),
processingDuration: prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "alertmanager_dispatcher_alert_processing_duration_seconds",
Help: "Summary of latencies for the processing of alerts.",
},
),
}
prometheus.MustRegister(m.aggrGroups, m.processingDuration)
return &m
}
// Dispatcher sorts incoming alerts into aggregation groups and
// assigns the correct notifiers to each.
type Dispatcher struct {
route *Route
alerts provider.Alerts
stage notify.Stage
route *Route
alerts provider.Alerts
stage notify.Stage
metrics *DispatcherMetrics
marker types.Marker
timeout func(time.Duration) time.Duration
@ -58,6 +86,7 @@ func NewDispatcher(
mk types.Marker,
to func(time.Duration) time.Duration,
l log.Logger,
m *DispatcherMetrics,
) *Dispatcher {
disp := &Dispatcher{
alerts: ap,
@ -66,6 +95,7 @@ func NewDispatcher(
marker: mk,
timeout: to,
logger: log.With(l, "component", "dispatcher"),
metrics: m,
}
return disp
}
@ -76,6 +106,7 @@ func (d *Dispatcher) Run() {
d.mtx.Lock()
d.aggrGroups = map[*Route]map[model.Fingerprint]*aggrGroup{}
d.metrics.aggrGroups.Set(0)
d.mtx.Unlock()
d.ctx, d.cancel = context.WithCancel(context.Background())
@ -109,9 +140,11 @@ func (d *Dispatcher) run(it provider.AlertIterator) {
continue
}
now := time.Now()
for _, r := range d.route.Match(alert.Labels) {
d.processAlert(alert, r)
}
d.metrics.processingDuration.Observe(time.Since(now).Seconds())
case <-cleanup.C:
d.mtx.Lock()
@ -121,6 +154,7 @@ func (d *Dispatcher) run(it provider.AlertIterator) {
if ag.empty() {
ag.stop()
delete(groups, ag.fingerprint())
d.metrics.aggrGroups.Dec()
}
}
}
@ -252,6 +286,7 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *Route) {
if !ok {
ag = newAggrGroup(d.ctx, groupLabels, route, d.timeout, d.logger)
group[fp] = ag
d.metrics.aggrGroups.Inc()
go ag.run(func(ctx context.Context, alerts ...*types.Alert) bool {
_, _, err := d.stage.Exec(ctx, d.logger, alerts...)

View File

@ -372,7 +372,7 @@ route:
timeout := func(d time.Duration) time.Duration { return time.Duration(0) }
recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, logger)
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, logger, NewDispatcherMetrics(prometheus.NewRegistry()))
go dispatcher.Run()
defer dispatcher.Stop()