alertmanager/inhibit/inhibit.go
beorn7 22db73fbf7 Modify the self-inhibition prevention semantics
This has been discussed in #666 (issue of hell...).

As concluded there, the cleanest semantics is most likely the
following: "An alert that matches both target and source side cannot
inhibit alerts for which the same is true." The two open questions
were:
1. How difficult is the implementation?
2. Is it needed?

This relatively simple commit proves that the answer to (1) is: Not
very difficult. (This also includes a performance-improving
simplification, which would have been possible without a change of
semantics.)

The answer to (2) is twofold:

For one, the original use case in #666 wasn't solved by our interim
solution. What we solved is the case where the self-inhibition is
triggered by a wide target match, i.e. I have a specific alert that
should inhibit a whole group of target alerts without inhibiting
itself. What we did _not_ solve is the inverted case: Self-inhibition
by a wide source match, i.e. an alert that should only fire if none of
a whole group of source alert fires. I mean, we "fixed" it as in, the
target alert will never be inhibited, but @lmb in #666 wanted the
alert to be inhibited _sometimes_ (just not _always_).

The other part is that I think that the asymmetry in our interim
solution will at some point haunt us. Thus, I really would like to get
this change in before we do a 1.0 release.

In practice, I expect this to be only relevant in very rare cases. But
those cases will be most difficult to reason with, and I claim that
the solution in this commit is matching what humans intuitively
expect.

Signed-off-by: beorn7 <beorn@soundcloud.com>
2019-02-25 16:10:08 +01:00

221 lines
5.9 KiB
Go

// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package inhibit
import (
"context"
"sync"
"time"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/oklog/oklog/pkg/group"
"github.com/prometheus/common/model"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/store"
"github.com/prometheus/alertmanager/types"
)
// An Inhibitor determines whether a given label set is muted
// based on the currently active alerts and a set of inhibition rules.
type Inhibitor struct {
alerts provider.Alerts
rules []*InhibitRule
marker types.Marker
logger log.Logger
mtx sync.RWMutex
cancel func()
}
// NewInhibitor returns a new Inhibitor.
func NewInhibitor(ap provider.Alerts, rs []*config.InhibitRule, mk types.Marker, logger log.Logger) *Inhibitor {
ih := &Inhibitor{
alerts: ap,
marker: mk,
logger: logger,
}
for _, cr := range rs {
r := NewInhibitRule(cr)
ih.rules = append(ih.rules, r)
}
return ih
}
func (ih *Inhibitor) run(ctx context.Context) {
it := ih.alerts.Subscribe()
defer it.Close()
for {
select {
case <-ctx.Done():
return
case a := <-it.Next():
if err := it.Err(); err != nil {
level.Error(ih.logger).Log("msg", "Error iterating alerts", "err", err)
continue
}
// Update the inhibition rules' cache.
for _, r := range ih.rules {
if r.SourceMatchers.Match(a.Labels) {
if err := r.scache.Set(a); err != nil {
level.Error(ih.logger).Log("msg", "error on set alert", "err", err)
}
}
}
}
}
}
// Run the Inhibitor's background processing.
func (ih *Inhibitor) Run() {
var (
g group.Group
ctx context.Context
)
ih.mtx.Lock()
ctx, ih.cancel = context.WithCancel(context.Background())
ih.mtx.Unlock()
runCtx, runCancel := context.WithCancel(ctx)
for _, rule := range ih.rules {
rule.scache.Run(runCtx)
}
g.Add(func() error {
ih.run(runCtx)
return nil
}, func(err error) {
runCancel()
})
if err := g.Run(); err != nil {
level.Warn(ih.logger).Log("msg", "error running inhibitor", "err", err)
}
}
// Stop the Inhibitor's background processing.
func (ih *Inhibitor) Stop() {
if ih == nil {
return
}
ih.mtx.RLock()
defer ih.mtx.RUnlock()
if ih.cancel != nil {
ih.cancel()
}
}
// Mutes returns true iff the given label set is muted.
func (ih *Inhibitor) Mutes(lset model.LabelSet) bool {
fp := lset.Fingerprint()
for _, r := range ih.rules {
if !r.TargetMatchers.Match(lset) {
// If target side of rule doesn't match, we don't need to look any further.
continue
}
// If we are here, the target side matches. If the source side matches, too, we
// need to exclude inhibiting alerts for which the same is true.
if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Match(lset)); eq {
ih.marker.SetInhibited(fp, inhibitedByFP.String())
return true
}
}
ih.marker.SetInhibited(fp)
return false
}
// An InhibitRule specifies that a class of (source) alerts should inhibit
// notifications for another class of (target) alerts if all specified matching
// labels are equal between the two alerts. This may be used to inhibit alerts
// from sending notifications if their meaning is logically a subset of a
// higher-level alert.
type InhibitRule struct {
// The set of Filters which define the group of source alerts (which inhibit
// the target alerts).
SourceMatchers types.Matchers
// The set of Filters which define the group of target alerts (which are
// inhibited by the source alerts).
TargetMatchers types.Matchers
// A set of label names whose label values need to be identical in source and
// target alerts in order for the inhibition to take effect.
Equal map[model.LabelName]struct{}
// Cache of alerts matching source labels.
scache *store.Alerts
}
// NewInhibitRule returns a new InhibitRule based on a configuration definition.
func NewInhibitRule(cr *config.InhibitRule) *InhibitRule {
var (
sourcem types.Matchers
targetm types.Matchers
)
for ln, lv := range cr.SourceMatch {
sourcem = append(sourcem, types.NewMatcher(model.LabelName(ln), lv))
}
for ln, lv := range cr.SourceMatchRE {
sourcem = append(sourcem, types.NewRegexMatcher(model.LabelName(ln), lv.Regexp))
}
for ln, lv := range cr.TargetMatch {
targetm = append(targetm, types.NewMatcher(model.LabelName(ln), lv))
}
for ln, lv := range cr.TargetMatchRE {
targetm = append(targetm, types.NewRegexMatcher(model.LabelName(ln), lv.Regexp))
}
equal := map[model.LabelName]struct{}{}
for _, ln := range cr.Equal {
equal[ln] = struct{}{}
}
return &InhibitRule{
SourceMatchers: sourcem,
TargetMatchers: targetm,
Equal: equal,
scache: store.NewAlerts(15 * time.Minute),
}
}
// hasEqual checks whether the source cache contains alerts matching the equal
// labels for the given label set. If excludeTwoSidedMatch is true, alerts that
// match both the source and the target side of the rule are disregarded.
func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool) (model.Fingerprint, bool) {
Outer:
for a := range r.scache.List() {
// The cache might be stale and contain resolved alerts.
if a.Resolved() {
continue
}
for n := range r.Equal {
if a.Labels[n] != lset[n] {
continue Outer
}
}
if excludeTwoSidedMatch && r.TargetMatchers.Match(a.Labels) {
continue Outer
}
return a.Fingerprint(), true
}
return model.Fingerprint(0), false
}