filter out silenced alerts and save notification state
This commit is contained in:
parent
1ffa8b78d3
commit
7a510669cb
|
@ -3,8 +3,6 @@ package crdt
|
|||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
|
||||
"github.com/prometheus/log"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -54,8 +52,6 @@ func (lww *LWW) Add(key string, score uint64, val interface{}) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
log.Infof("adding %s", val)
|
||||
|
||||
e.del = 0
|
||||
e.add = score
|
||||
e.val = val
|
||||
|
|
|
@ -158,7 +158,7 @@ func (api *API) delSilence(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
|
||||
func (api *API) listSilences(w http.ResponseWriter, r *http.Request) {
|
||||
sils, err := api.state.Silence().GetAll()
|
||||
sils, err := api.state.Silence().List()
|
||||
if err != nil {
|
||||
respondError(w, apiError{
|
||||
typ: errorBadData,
|
||||
|
|
|
@ -9,7 +9,7 @@ import (
|
|||
"github.com/prometheus/log"
|
||||
)
|
||||
|
||||
const ResolveTimeout = 15 * time.Second
|
||||
const ResolveTimeout = 35 * time.Second
|
||||
|
||||
// Dispatcher dispatches alerts. It is absed on the alert's data
|
||||
// rather than the time they arrive. Thus it can recover it's state
|
||||
|
@ -37,7 +37,47 @@ func NewDispatcher(state State, notifiers []Notifier) *Dispatcher {
|
|||
return disp
|
||||
}
|
||||
|
||||
func (d *Dispatcher) filter(alerts ...*Alert) ([]*Alert, error) {
|
||||
|
||||
silences, err := d.state.Silence().List()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var sentAlerts []*Alert
|
||||
|
||||
for _, alert := range alerts {
|
||||
add := true
|
||||
// None of the existing silences must match the alert.
|
||||
for _, sil := range silences {
|
||||
if sil.Match(alert.Labels) {
|
||||
add = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if !add {
|
||||
continue
|
||||
}
|
||||
|
||||
// Filter out alerts that have already been sent.
|
||||
ni, err := d.state.Notify().Get(alert.Fingerprint())
|
||||
// Always try to send on error as the safest option.
|
||||
if err == nil && ni.LastSent.Before(alert.ResolvedAt) && ni.LastResolved == alert.Resolved() {
|
||||
continue
|
||||
}
|
||||
|
||||
sentAlerts = append(sentAlerts, alert)
|
||||
}
|
||||
|
||||
return sentAlerts, nil
|
||||
}
|
||||
|
||||
func (d *Dispatcher) notify(name string, alerts ...*Alert) error {
|
||||
alerts, err := d.filter(alerts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(alerts) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
@ -50,13 +90,25 @@ func (d *Dispatcher) notify(name string, alerts ...*Alert) error {
|
|||
return fmt.Errorf("notifier %q does not exist", name)
|
||||
}
|
||||
|
||||
return notifier.Send(alerts...)
|
||||
if err = notifier.Send(alerts...); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, alert := range alerts {
|
||||
_ = d.state.Notify().Set(alert.Fingerprint(), &NotifyInfo{
|
||||
LastSent: time.Now(),
|
||||
LastResolved: alert.Resolved(),
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Dispatcher) Run() {
|
||||
|
||||
updates := d.state.Alert().Iter()
|
||||
cleanup := time.Tick(30 * time.Second)
|
||||
var (
|
||||
updates = d.state.Alert().Iter()
|
||||
cleanup = time.Tick(30 * time.Second)
|
||||
)
|
||||
|
||||
for {
|
||||
select {
|
||||
|
@ -69,21 +121,6 @@ func (d *Dispatcher) Run() {
|
|||
}
|
||||
}
|
||||
|
||||
// now := time.Now()
|
||||
|
||||
// list, err := d.state.Alert().GetAll()
|
||||
// if err != nil {
|
||||
// log.Error(err)
|
||||
// }
|
||||
|
||||
// for _, a := range list {
|
||||
// if a.Resolved() && a.ResolvedAt.Before(now.Sub(ResolveTimeout)) {
|
||||
// if err := d.state.Alert().Del(a.Fingerprint()); err != nil {
|
||||
// log.Errorf("error cleaning resolved alerts: %s", err)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
case alert := <-updates:
|
||||
|
||||
conf, err := d.state.Config().Get()
|
||||
|
@ -99,6 +136,7 @@ func (d *Dispatcher) Run() {
|
|||
if alert.ResolvedAt.IsZero() {
|
||||
alert.ResolvedAt = alert.CreatedAt.Add(ResolveTimeout)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -123,6 +161,31 @@ func (d *Dispatcher) processAlert(alert *Alert, opts *RouteOpts) {
|
|||
ag.insert(alert)
|
||||
}
|
||||
|
||||
type Silence struct {
|
||||
Matchers Matchers
|
||||
|
||||
// The numeric ID of the silence.
|
||||
ID string
|
||||
|
||||
// Name/email of the silence creator.
|
||||
CreatedBy string
|
||||
// When the silence was first created (Unix timestamp).
|
||||
CreatedAt, EndsAt time.Time
|
||||
|
||||
// Additional comment about the silence.
|
||||
Comment string
|
||||
}
|
||||
|
||||
func (sil *Silence) Match(lset model.LabelSet) bool {
|
||||
now := time.Now()
|
||||
|
||||
if now.Before(sil.CreatedAt) || now.After(sil.EndsAt) {
|
||||
return false
|
||||
}
|
||||
|
||||
return sil.Matchers.Match(lset)
|
||||
}
|
||||
|
||||
// Alert models an action triggered by Prometheus.
|
||||
type Alert struct {
|
||||
// Label value pairs for purpose of aggregation, matching, and disposition
|
||||
|
|
|
@ -82,8 +82,8 @@ func NewRegexMatcher(name model.LabelName, value string) (*Matcher, error) {
|
|||
|
||||
type Matchers []*Matcher
|
||||
|
||||
// MatchAll checks whether all matchers are fulfilled against the given label set.
|
||||
func (ms Matchers) MatchAll(lset model.LabelSet) bool {
|
||||
// Match checks whether all matchers are fulfilled against the given label set.
|
||||
func (ms Matchers) Match(lset model.LabelSet) bool {
|
||||
for _, m := range ms {
|
||||
if !m.Match(lset) {
|
||||
return false
|
||||
|
|
|
@ -41,7 +41,7 @@ type Route struct {
|
|||
// Match does a depth-first left-to-right search through the route tree
|
||||
// and returns the flattened configuration for the reached node.
|
||||
func (r *Route) Match(lset model.LabelSet) []*RouteOpts {
|
||||
if !r.Matchers.MatchAll(lset) {
|
||||
if !r.Matchers.Match(lset) {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
// Copyright 2015 Prometheus Team
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package manager
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type Silence struct {
|
||||
// The numeric ID of the silence.
|
||||
ID string
|
||||
|
||||
// Name/email of the silence creator.
|
||||
CreatedBy string
|
||||
// When the silence was first created (Unix timestamp).
|
||||
CreatedAt, EndsAt time.Time
|
||||
|
||||
// Additional comment about the silence.
|
||||
Comment string
|
||||
|
||||
// Matchers that determine which alerts are silenced.
|
||||
Matchers Matchers
|
||||
|
||||
// Timer used to trigger the deletion of the Silence after its expiry
|
||||
// time.
|
||||
expiryTimer *time.Timer
|
||||
}
|
|
@ -16,7 +16,7 @@ import (
|
|||
type State interface {
|
||||
Silence() SilenceState
|
||||
Config() ConfigState
|
||||
// Notify() NotifyState
|
||||
Notify() NotifyState
|
||||
Alert() AlertState
|
||||
}
|
||||
|
||||
|
@ -34,11 +34,13 @@ type ConfigState interface {
|
|||
}
|
||||
|
||||
type NotifyState interface {
|
||||
Get(model.Fingerprint) (*NotifyInfo, error)
|
||||
Set(model.Fingerprint, *NotifyInfo) error
|
||||
}
|
||||
|
||||
type SilenceState interface {
|
||||
// Silences returns a list of all silences.
|
||||
GetAll() ([]*Silence, error)
|
||||
List() ([]*Silence, error)
|
||||
|
||||
// SetSilence sets the given silence.
|
||||
Set(*Silence) error
|
||||
|
@ -51,6 +53,7 @@ type simpleState struct {
|
|||
silences *memSilences
|
||||
alerts *crdtAlerts
|
||||
config *memConfig
|
||||
notify *memNotify
|
||||
}
|
||||
|
||||
func NewSimpleState() State {
|
||||
|
@ -65,6 +68,9 @@ func NewSimpleState() State {
|
|||
// updates: make(chan *Alert, 100),
|
||||
// },
|
||||
config: &memConfig{},
|
||||
notify: &memNotify{
|
||||
m: map[model.Fingerprint]*NotifyInfo{},
|
||||
},
|
||||
}
|
||||
|
||||
go state.alerts.run()
|
||||
|
@ -84,6 +90,31 @@ func (s *simpleState) Config() ConfigState {
|
|||
return s.config
|
||||
}
|
||||
|
||||
func (s *simpleState) Notify() NotifyState {
|
||||
return s.notify
|
||||
}
|
||||
|
||||
type NotifyInfo struct {
|
||||
LastSent time.Time
|
||||
LastResolved bool
|
||||
}
|
||||
|
||||
type memNotify struct {
|
||||
m map[model.Fingerprint]*NotifyInfo
|
||||
}
|
||||
|
||||
func (s *memNotify) Get(fp model.Fingerprint) (*NotifyInfo, error) {
|
||||
if info, ok := s.m[fp]; ok {
|
||||
return info, nil
|
||||
}
|
||||
return nil, fmt.Errorf("notify info for %s not found", fp)
|
||||
}
|
||||
|
||||
func (s *memNotify) Set(fp model.Fingerprint, info *NotifyInfo) error {
|
||||
s.m[fp] = info
|
||||
return nil
|
||||
}
|
||||
|
||||
type memConfig struct {
|
||||
config *Config
|
||||
mtx sync.RWMutex
|
||||
|
@ -324,7 +355,7 @@ func (s *memSilences) Del(sid string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *memSilences) GetAll() ([]*Silence, error) {
|
||||
func (s *memSilences) List() ([]*Silence, error) {
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
|
||||
|
|
Loading…
Reference in New Issue