alertmanager/notify/notify.go

438 lines
12 KiB
Go
Raw Normal View History

2015-10-11 15:24:49 +00:00
// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2015-09-29 12:45:38 +00:00
package notify
import (
"fmt"
2015-09-29 13:12:31 +00:00
"sync"
"time"
2015-09-29 12:45:38 +00:00
"github.com/cenkalti/backoff"
2015-09-29 13:12:31 +00:00
"github.com/prometheus/common/log"
"github.com/prometheus/common/model"
2015-09-29 12:45:38 +00:00
"golang.org/x/net/context"
2015-09-29 13:12:31 +00:00
"github.com/prometheus/alertmanager/provider"
2015-09-29 12:45:38 +00:00
"github.com/prometheus/alertmanager/types"
)
2015-11-12 12:18:36 +00:00
// MinTimeout is the minimum timeout that is set for the context of a call
// to a notification pipeline.
const MinTimeout = 10 * time.Second
2015-11-12 12:18:36 +00:00
// notifyKey defines a custom type with which a context is populated to
// avoid accidental collisions.
2015-09-29 13:12:31 +00:00
type notifyKey int
const (
2015-11-10 12:47:04 +00:00
keyReceiver notifyKey = iota
keyRepeatInterval
keySendResolved
keyGroupLabels
keyGroupKey
keyNow
2015-09-29 13:12:31 +00:00
)
2015-11-12 12:18:36 +00:00
// WithReceiver populates a context with a receiver.
2015-11-10 12:47:04 +00:00
func WithReceiver(ctx context.Context, rcv string) context.Context {
return context.WithValue(ctx, keyReceiver, rcv)
}
2015-11-12 12:18:36 +00:00
// WithRepeatInterval populates a context with a repeat interval.
func WithRepeatInterval(ctx context.Context, t time.Duration) context.Context {
return context.WithValue(ctx, keyRepeatInterval, t)
}
2015-11-12 12:18:36 +00:00
// WithSendResolved populates a context with a send resolved boolean.
func WithSendResolved(ctx context.Context, b bool) context.Context {
return context.WithValue(ctx, keySendResolved, b)
}
2015-11-12 12:18:36 +00:00
// WithGroupKey populates a context with a group key.
func WithGroupKey(ctx context.Context, fp model.Fingerprint) context.Context {
return context.WithValue(ctx, keyGroupKey, fp)
}
2015-11-12 12:18:36 +00:00
// WithGroupLabels populates a context with grouping labels.
func WithGroupLabels(ctx context.Context, lset model.LabelSet) context.Context {
return context.WithValue(ctx, keyGroupLabels, lset)
}
2015-11-12 12:18:36 +00:00
// WithNow populates a context with a now timestamp.
func WithNow(ctx context.Context, t time.Time) context.Context {
return context.WithValue(ctx, keyNow, t)
}
2015-11-26 17:19:46 +00:00
func receiver(ctx context.Context) string {
recv, ok := Receiver(ctx)
if !ok {
log.Error("missing receiver")
}
return recv
}
2015-11-12 12:18:36 +00:00
// Receiver extracts a receiver from the context. Iff none exists, the
// second argument is false.
2015-11-10 12:47:04 +00:00
func Receiver(ctx context.Context) (string, bool) {
v, ok := ctx.Value(keyReceiver).(string)
return v, ok
}
2015-11-12 12:18:36 +00:00
// RepeatInterval extracts a repeat interval from the context. Iff none exists, the
// second argument is false.
func RepeatInterval(ctx context.Context) (time.Duration, bool) {
v, ok := ctx.Value(keyRepeatInterval).(time.Duration)
return v, ok
}
2015-11-12 12:18:36 +00:00
// SendResolved extracts a send resolved boolean from the context.
// Iff none exists, the second argument is false.
func SendResolved(ctx context.Context) (bool, bool) {
v, ok := ctx.Value(keySendResolved).(bool)
return v, ok
}
2015-11-12 12:18:36 +00:00
// GroupKey extracts a group key from the context. Iff none exists, the
// second argument is false.
func GroupKey(ctx context.Context) (model.Fingerprint, bool) {
v, ok := ctx.Value(keyGroupKey).(model.Fingerprint)
return v, ok
}
2015-11-25 14:49:26 +00:00
func groupLabels(ctx context.Context) model.LabelSet {
groupLabels, ok := GroupLabels(ctx)
if !ok {
log.Error("missing group labels")
}
return groupLabels
}
2015-11-12 12:18:36 +00:00
// GroupLabels extracts grouping label set from the context. Iff none exists, the
// second argument is false.
func GroupLabels(ctx context.Context) (model.LabelSet, bool) {
v, ok := ctx.Value(keyGroupLabels).(model.LabelSet)
return v, ok
}
2015-11-12 12:18:36 +00:00
// Now extracts a now timestamp from the context. Iff none exists, the
// second argument is false.
func Now(ctx context.Context) (time.Time, bool) {
v, ok := ctx.Value(keyNow).(time.Time)
return v, ok
}
2015-11-12 12:18:36 +00:00
// A Notifier is a type which notifies about alerts under constraints of the
// given context.
2015-09-29 13:12:31 +00:00
type Notifier interface {
Notify(context.Context, ...*types.Alert) error
}
2015-11-12 12:18:36 +00:00
// Fanout sends notifications through all notifiers it holds at once.
2015-10-11 13:37:21 +00:00
type Fanout map[string]Notifier
2015-09-29 13:12:31 +00:00
2015-11-12 12:18:36 +00:00
// Notify attempts to notify all Notifiers concurrently. It returns a types.MultiError
// if any of them fails.
2015-10-11 14:54:31 +00:00
func (ns Fanout) Notify(ctx context.Context, alerts ...*types.Alert) error {
2015-10-11 10:40:43 +00:00
var (
wg sync.WaitGroup
me types.MultiError
)
2015-09-29 13:12:31 +00:00
wg.Add(len(ns))
2015-11-10 12:47:04 +00:00
receiver, ok := Receiver(ctx)
2015-10-11 13:37:21 +00:00
if !ok {
2015-11-10 12:47:04 +00:00
return fmt.Errorf("receiver missing")
2015-10-11 13:37:21 +00:00
}
2015-10-11 14:54:31 +00:00
for suffix, n := range ns {
2015-11-10 12:47:04 +00:00
// Suffix the receiver with the unique key for the fanout.
foCtx := WithReceiver(ctx, fmt.Sprintf("%s/%s", receiver, suffix))
2015-10-11 13:37:21 +00:00
2015-09-29 13:12:31 +00:00
go func(n Notifier) {
2015-10-11 13:37:21 +00:00
if err := n.Notify(foCtx, alerts...); err != nil {
me.Add(err)
2015-09-29 13:12:31 +00:00
log.Errorf("Error on notify: %s", err)
}
wg.Done()
}(n)
}
wg.Wait()
if me.Len() > 0 {
return &me
2015-10-11 14:54:31 +00:00
}
return nil
2015-09-29 12:45:38 +00:00
}
2015-11-12 12:18:36 +00:00
// RetryNotifier accepts another notifier and retries notifying
// on error with exponential backoff.
type RetryNotifier struct {
2015-10-11 14:54:31 +00:00
notifier Notifier
}
2015-11-12 12:18:36 +00:00
// Retry wraps the given notifier in a RetryNotifier.
2015-10-11 14:54:31 +00:00
func Retry(n Notifier) *RetryNotifier {
return &RetryNotifier{notifier: n}
}
2015-11-12 12:18:36 +00:00
// Notify calls the underlying notifier with exponential backoff until it succeeds.
// It aborts if the context is canceled or timed out.
func (n *RetryNotifier) Notify(ctx context.Context, alerts ...*types.Alert) error {
var (
i = 0
b = backoff.NewExponentialBackOff()
tick = backoff.NewTicker(b)
)
defer tick.Stop()
for {
i++
select {
case <-tick.C:
2015-10-11 14:54:31 +00:00
if err := n.notifier.Notify(ctx, alerts...); err != nil {
2015-10-09 08:48:25 +00:00
log.Warnf("Notify attempt %d failed: %s", i, err)
} else {
return nil
}
case <-ctx.Done():
return ctx.Err()
}
}
}
2015-11-12 12:18:36 +00:00
// DedupingNotifier filters and forwards alerts to another notifier.
// Filtering happens based on a provider of NotifyInfos.
// On successful notification new NotifyInfos are set.
2015-09-29 13:12:31 +00:00
type DedupingNotifier struct {
notifies provider.Notifies
notifier Notifier
2015-09-29 12:45:38 +00:00
}
2015-11-12 12:18:36 +00:00
// Dedup wraps a Notifier in a DedupingNotifier that runs against the given NotifyInfo provider.
2015-10-11 14:54:31 +00:00
func Dedup(notifies provider.Notifies, n Notifier) *DedupingNotifier {
return &DedupingNotifier{notifies: notifies, notifier: n}
2015-09-29 12:45:38 +00:00
}
2015-11-12 12:18:36 +00:00
// Notify implements the Notifier interface.
2015-09-29 13:12:31 +00:00
func (n *DedupingNotifier) Notify(ctx context.Context, alerts ...*types.Alert) error {
2015-11-10 12:47:04 +00:00
name, ok := Receiver(ctx)
2015-09-29 13:12:31 +00:00
if !ok {
return fmt.Errorf("notifier name missing")
}
repeatInterval, ok := RepeatInterval(ctx)
2015-09-29 13:12:31 +00:00
if !ok {
return fmt.Errorf("repeat interval missing")
2015-09-29 12:45:38 +00:00
}
2015-09-29 13:12:31 +00:00
sendResolved, ok := SendResolved(ctx)
2015-09-29 13:12:31 +00:00
if !ok {
return fmt.Errorf("send resolved missing")
}
now, ok := Now(ctx)
if !ok {
return fmt.Errorf("now time missing")
}
2015-09-29 13:12:31 +00:00
var fps []model.Fingerprint
2015-09-29 12:45:38 +00:00
for _, a := range alerts {
2015-09-29 13:12:31 +00:00
fps = append(fps, a.Fingerprint())
2015-09-29 12:45:38 +00:00
}
2015-09-29 13:12:31 +00:00
notifies, err := n.notifies.Get(name, fps...)
if err != nil {
2015-09-29 12:45:38 +00:00
return err
}
var (
doResend bool
resendQueue []*types.Alert
filtered []*types.Alert
)
2015-09-29 13:12:31 +00:00
for i, a := range alerts {
last := notifies[i]
if last != nil {
if a.Resolved() {
if !sendResolved || last.Resolved {
2015-09-29 13:12:31 +00:00
continue
}
} else if !last.Resolved {
// Do not send again if last was delivered unless
// the repeat interval has already passed.
if !now.After(last.Timestamp.Add(repeatInterval)) {
// To not repeat initial batch fragmentation after the repeat interval
// has passed, store them and send them anyway if on of the other
// alerts has already passed the repeat interval.
// This way we unify batches again.
resendQueue = append(resendQueue, a)
continue
} else {
doResend = true
2015-09-29 13:12:31 +00:00
}
}
} else if a.Resolved() {
// If the alert is resolved but we never notified about it firing,
// there is nothing to do.
2015-10-02 13:58:37 +00:00
continue
2015-09-29 13:12:31 +00:00
}
filtered = append(filtered, a)
}
// As we are resending an alert anyway, resend all of them even if their
// repeat interval has not yet passed.
if doResend {
filtered = append(filtered, resendQueue...)
}
// The deduping notifier is the last one before actually sending notifications.
// Thus, this is the place where we abort if after all filtering, nothing is left.
if len(filtered) == 0 {
return nil
}
var newNotifies []*types.NotifyInfo
2015-09-29 13:12:31 +00:00
for _, a := range filtered {
newNotifies = append(newNotifies, &types.NotifyInfo{
2015-09-29 13:12:31 +00:00
Alert: a.Fingerprint(),
2015-11-10 12:47:04 +00:00
Receiver: name,
2015-09-29 13:12:31 +00:00
Resolved: a.Resolved(),
Timestamp: now,
})
}
if err := n.notifier.Notify(ctx, filtered...); err != nil {
2015-09-29 12:45:38 +00:00
return err
}
2015-10-06 18:40:52 +00:00
return n.notifies.Set(newNotifies...)
2015-09-29 13:12:31 +00:00
}
2015-11-12 12:18:36 +00:00
// Router dispatches the alerts to one of a set of
2015-09-29 13:12:31 +00:00
// named notifiers based on the name value provided in the context.
2015-10-11 14:54:31 +00:00
type Router map[string]Notifier
2015-09-29 13:12:31 +00:00
2015-11-12 12:18:36 +00:00
// Notify implements the Notifier interface.
2015-10-11 14:54:31 +00:00
func (rs Router) Notify(ctx context.Context, alerts ...*types.Alert) error {
2015-11-10 12:47:04 +00:00
receiver, ok := Receiver(ctx)
2015-09-29 13:12:31 +00:00
if !ok {
return fmt.Errorf("notifier name missing")
}
2015-11-10 12:47:04 +00:00
notifier, ok := rs[receiver]
2015-09-29 13:12:31 +00:00
if !ok {
2015-11-10 12:47:04 +00:00
return fmt.Errorf("notifier %q does not exist", receiver)
2015-09-29 13:12:31 +00:00
}
return notifier.Notify(ctx, alerts...)
}
2015-12-03 16:27:36 +00:00
// SilenceNotifier filters alerts through a silence muter before
// passing it on to the next Notifier
type SilenceNotifier struct {
2015-10-11 14:54:31 +00:00
notifier Notifier
2015-12-03 16:27:36 +00:00
muter types.Muter
marker types.Marker
2015-10-11 14:54:31 +00:00
}
2015-12-03 16:27:36 +00:00
// Silence returns a new SilenceNotifier.
func Silence(m types.Muter, n Notifier, mk types.Marker) *SilenceNotifier {
return &SilenceNotifier{
notifier: n,
muter: m,
marker: mk,
}
2015-09-29 13:12:31 +00:00
}
2015-12-03 16:27:36 +00:00
// Notify implements the Notifier interface.
func (n *SilenceNotifier) Notify(ctx context.Context, alerts ...*types.Alert) error {
var filtered []*types.Alert
for _, a := range alerts {
_, ok := n.marker.Silenced(a.Fingerprint())
// TODO(fabxc): increment total alerts counter.
// Do not send the alert if the silencer mutes it.
if !n.muter.Mutes(a.Labels) {
// TODO(fabxc): increment muted alerts counter.
filtered = append(filtered, a)
// Store whether a previously silenced alert is firing again.
a.WasSilenced = ok
}
}
return n.notifier.Notify(ctx, filtered...)
}
// InhibitNotifier filters alerts through an inhibition muter before
// passing it on to the next Notifier
type InhibitNotifier struct {
notifier Notifier
muter types.Muter
marker types.Marker
}
// Inhibit return a new InhibitNotifier.
func Inhibit(m types.Muter, n Notifier, mk types.Marker) *InhibitNotifier {
return &InhibitNotifier{
notifier: n,
muter: m,
marker: mk,
}
}
// Notify implements the Notifier interface.
func (n *InhibitNotifier) Notify(ctx context.Context, alerts ...*types.Alert) error {
2015-09-29 13:12:31 +00:00
var filtered []*types.Alert
for _, a := range alerts {
2015-12-03 16:27:36 +00:00
ok := n.marker.Inhibited(a.Fingerprint())
2015-09-29 13:12:31 +00:00
// TODO(fabxc): increment total alerts counter.
// Do not send the alert if the silencer mutes it.
2015-12-03 16:27:36 +00:00
if !n.muter.Mutes(a.Labels) {
2015-09-29 13:12:31 +00:00
// TODO(fabxc): increment muted alerts counter.
filtered = append(filtered, a)
2015-12-03 16:27:36 +00:00
// Store whether a previously inhibited alert is firing again.
a.WasInhibited = ok
2015-09-29 13:12:31 +00:00
}
}
2015-10-11 14:54:31 +00:00
return n.notifier.Notify(ctx, filtered...)
2015-09-29 13:12:31 +00:00
}
2015-11-12 12:18:36 +00:00
// LogNotifier logs the alerts to be notified about. It forwards to another Notifier
// afterwards, if any is provided.
2015-09-29 13:12:31 +00:00
type LogNotifier struct {
2015-10-11 14:54:31 +00:00
log log.Logger
notifier Notifier
}
2015-11-12 12:18:36 +00:00
// Log wraps a Notifier in a LogNotifier with the given Logger.
2015-10-11 14:54:31 +00:00
func Log(n Notifier, log log.Logger) *LogNotifier {
return &LogNotifier{log: log, notifier: n}
2015-09-29 13:12:31 +00:00
}
2015-11-12 12:18:36 +00:00
// Notify implements the Notifier interface.
2015-10-11 14:54:31 +00:00
func (n *LogNotifier) Notify(ctx context.Context, alerts ...*types.Alert) error {
n.log.Debugf("notify %v", alerts)
2015-09-29 13:12:31 +00:00
2015-10-11 14:54:31 +00:00
if n.notifier != nil {
return n.notifier.Notify(ctx, alerts...)
2015-09-29 13:12:31 +00:00
}
2015-09-29 12:45:38 +00:00
return nil
}