2015-07-01 15:56:53 +00:00
|
|
|
package manager
|
2015-06-30 12:29:30 +00:00
|
|
|
|
|
|
|
import (
|
2015-07-10 16:27:17 +00:00
|
|
|
"encoding/json"
|
2015-07-01 11:17:08 +00:00
|
|
|
"fmt"
|
2015-07-10 16:27:17 +00:00
|
|
|
"path/filepath"
|
2015-07-04 11:02:49 +00:00
|
|
|
"sort"
|
2015-07-01 11:17:08 +00:00
|
|
|
"sync"
|
2015-07-04 12:59:52 +00:00
|
|
|
"time"
|
2015-07-01 11:17:08 +00:00
|
|
|
|
2015-06-30 12:29:30 +00:00
|
|
|
"github.com/prometheus/common/model"
|
2015-07-04 12:59:52 +00:00
|
|
|
"github.com/prometheus/log"
|
2015-07-07 07:47:09 +00:00
|
|
|
|
|
|
|
"github.com/prometheus/alertmanager/crdt"
|
2015-06-30 12:29:30 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// A State serves the Alertmanager's internal state about active silences.
|
|
|
|
type State interface {
|
2015-07-01 11:17:08 +00:00
|
|
|
Silence() SilenceState
|
2015-07-02 16:38:05 +00:00
|
|
|
Config() ConfigState
|
2015-07-09 13:01:38 +00:00
|
|
|
Notify() NotifyState
|
2015-07-01 16:36:37 +00:00
|
|
|
Alert() AlertState
|
2015-06-30 12:29:30 +00:00
|
|
|
}
|
|
|
|
|
2015-07-01 16:36:37 +00:00
|
|
|
type AlertState interface {
|
|
|
|
Add(...*Alert) error
|
2015-07-04 10:52:53 +00:00
|
|
|
Get(model.Fingerprint) (*Alert, error)
|
2015-07-01 16:36:37 +00:00
|
|
|
GetAll() ([]*Alert, error)
|
2015-07-02 16:38:05 +00:00
|
|
|
|
2015-07-04 12:59:52 +00:00
|
|
|
Iter() <-chan *Alert
|
2015-07-01 16:36:37 +00:00
|
|
|
}
|
2015-06-30 12:29:30 +00:00
|
|
|
|
2015-07-02 16:38:05 +00:00
|
|
|
type ConfigState interface {
|
|
|
|
Set(*Config) error
|
|
|
|
Get() (*Config, error)
|
|
|
|
}
|
2015-06-30 12:29:30 +00:00
|
|
|
|
2015-07-04 13:50:42 +00:00
|
|
|
type NotifyState interface {
|
2015-07-09 13:01:38 +00:00
|
|
|
Get(model.Fingerprint) (*NotifyInfo, error)
|
|
|
|
Set(model.Fingerprint, *NotifyInfo) error
|
2015-07-11 14:39:16 +00:00
|
|
|
List() ([]*NotifyInfo, error)
|
2015-07-04 13:50:42 +00:00
|
|
|
}
|
2015-06-30 12:29:30 +00:00
|
|
|
|
|
|
|
type SilenceState interface {
|
|
|
|
// Silences returns a list of all silences.
|
2015-07-09 13:01:38 +00:00
|
|
|
List() ([]*Silence, error)
|
2015-06-30 12:29:30 +00:00
|
|
|
|
|
|
|
// SetSilence sets the given silence.
|
2015-07-01 11:17:08 +00:00
|
|
|
Set(*Silence) error
|
|
|
|
Del(sid string) error
|
|
|
|
Get(sid string) (*Silence, error)
|
2015-06-30 12:29:30 +00:00
|
|
|
}
|
|
|
|
|
2015-07-02 16:38:05 +00:00
|
|
|
// simpleState implements the State interface based on in-memory storage.
|
|
|
|
type simpleState struct {
|
2015-07-01 11:17:08 +00:00
|
|
|
silences *memSilences
|
2015-07-07 07:47:09 +00:00
|
|
|
alerts *crdtAlerts
|
2015-07-02 16:38:05 +00:00
|
|
|
config *memConfig
|
2015-07-09 13:01:38 +00:00
|
|
|
notify *memNotify
|
2015-06-30 12:29:30 +00:00
|
|
|
}
|
|
|
|
|
2015-07-02 16:38:05 +00:00
|
|
|
func NewSimpleState() State {
|
2015-07-04 12:59:52 +00:00
|
|
|
state := &simpleState{
|
2015-07-01 11:17:08 +00:00
|
|
|
silences: &memSilences{
|
2015-07-04 10:52:53 +00:00
|
|
|
sils: map[string]*Silence{},
|
2015-07-01 11:17:08 +00:00
|
|
|
nextID: 1,
|
|
|
|
},
|
2015-07-07 07:47:09 +00:00
|
|
|
alerts: newCRDTAlerts(crdt.NewMemStorage()),
|
|
|
|
// alerts: &memAlerts{
|
|
|
|
// alerts: map[model.Fingerprint]*Alert{},
|
|
|
|
// updates: make(chan *Alert, 100),
|
|
|
|
// },
|
2015-07-02 16:38:05 +00:00
|
|
|
config: &memConfig{},
|
2015-07-09 13:01:38 +00:00
|
|
|
notify: &memNotify{
|
|
|
|
m: map[model.Fingerprint]*NotifyInfo{},
|
|
|
|
},
|
2015-06-30 12:29:30 +00:00
|
|
|
}
|
2015-07-04 12:59:52 +00:00
|
|
|
|
|
|
|
go state.alerts.run()
|
|
|
|
|
|
|
|
return state
|
2015-06-30 12:29:30 +00:00
|
|
|
}
|
|
|
|
|
2015-07-10 16:27:17 +00:00
|
|
|
func NewPersistentState(path string) State {
|
|
|
|
alertDB, err := crdt.NewLevelDBStorage(filepath.Join(path, "/alerts"))
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
state := &simpleState{
|
|
|
|
silences: &memSilences{
|
|
|
|
sils: map[string]*Silence{},
|
|
|
|
nextID: 1,
|
|
|
|
},
|
|
|
|
alerts: newCRDTAlerts(alertDB),
|
|
|
|
// alerts: &memAlerts{
|
|
|
|
// alerts: map[model.Fingerprint]*Alert{},
|
|
|
|
// updates: make(chan *Alert, 100),
|
|
|
|
// },
|
|
|
|
config: &memConfig{},
|
|
|
|
notify: &memNotify{
|
|
|
|
m: map[model.Fingerprint]*NotifyInfo{},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
go state.alerts.run()
|
|
|
|
|
|
|
|
return state
|
|
|
|
}
|
|
|
|
|
2015-07-02 16:38:05 +00:00
|
|
|
func (s *simpleState) Alert() AlertState {
|
2015-07-01 16:36:37 +00:00
|
|
|
return s.alerts
|
|
|
|
}
|
|
|
|
|
2015-07-02 16:38:05 +00:00
|
|
|
func (s *simpleState) Silence() SilenceState {
|
2015-07-01 11:17:08 +00:00
|
|
|
return s.silences
|
|
|
|
}
|
|
|
|
|
2015-07-02 16:38:05 +00:00
|
|
|
func (s *simpleState) Config() ConfigState {
|
|
|
|
return s.config
|
|
|
|
}
|
|
|
|
|
2015-07-09 13:01:38 +00:00
|
|
|
func (s *simpleState) Notify() NotifyState {
|
|
|
|
return s.notify
|
|
|
|
}
|
|
|
|
|
|
|
|
type NotifyInfo struct {
|
|
|
|
LastSent time.Time
|
|
|
|
LastResolved bool
|
2015-07-11 14:39:16 +00:00
|
|
|
Labels model.LabelSet
|
2015-07-09 13:01:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type memNotify struct {
|
|
|
|
m map[model.Fingerprint]*NotifyInfo
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *memNotify) Get(fp model.Fingerprint) (*NotifyInfo, error) {
|
|
|
|
if info, ok := s.m[fp]; ok {
|
|
|
|
return info, nil
|
|
|
|
}
|
|
|
|
return nil, fmt.Errorf("notify info for %s not found", fp)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *memNotify) Set(fp model.Fingerprint, info *NotifyInfo) error {
|
|
|
|
s.m[fp] = info
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-07-11 14:39:16 +00:00
|
|
|
func (s *memNotify) List() ([]*NotifyInfo, error) {
|
|
|
|
var res []*NotifyInfo
|
|
|
|
for _, ni := range s.m {
|
|
|
|
res = append(res, ni)
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
2015-07-02 16:38:05 +00:00
|
|
|
type memConfig struct {
|
|
|
|
config *Config
|
|
|
|
mtx sync.RWMutex
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *memConfig) Set(conf *Config) error {
|
|
|
|
c.mtx.Lock()
|
|
|
|
defer c.mtx.Unlock()
|
|
|
|
|
|
|
|
c.config = conf
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *memConfig) Get() (*Config, error) {
|
|
|
|
c.mtx.RLock()
|
|
|
|
defer c.mtx.RUnlock()
|
|
|
|
|
|
|
|
return c.config, nil
|
|
|
|
}
|
|
|
|
|
2015-07-07 07:47:09 +00:00
|
|
|
type crdtAlerts struct {
|
|
|
|
set crdt.Set
|
|
|
|
|
|
|
|
updates chan *Alert
|
|
|
|
subs []chan *Alert
|
|
|
|
mtx sync.RWMutex
|
|
|
|
}
|
|
|
|
|
|
|
|
func newCRDTAlerts(storage crdt.Storage) *crdtAlerts {
|
|
|
|
return &crdtAlerts{
|
|
|
|
set: crdt.NewLWW(storage),
|
|
|
|
updates: make(chan *Alert, 100),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *crdtAlerts) run() {
|
|
|
|
for a := range s.updates {
|
|
|
|
s.mtx.RLock()
|
|
|
|
|
|
|
|
for _, sub := range s.subs {
|
|
|
|
select {
|
|
|
|
case <-time.After(100 * time.Millisecond):
|
|
|
|
log.Errorf("dropped alert %s for subscriber", a)
|
|
|
|
case sub <- a:
|
|
|
|
// Success
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
s.mtx.RUnlock()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *crdtAlerts) Add(alerts ...*Alert) error {
|
|
|
|
for _, a := range alerts {
|
2015-07-10 16:27:17 +00:00
|
|
|
|
|
|
|
b, err := json.Marshal(a)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = s.set.Add(a.Fingerprint().String(), uint64(a.Timestamp.UnixNano()/1e6), b)
|
2015-07-07 07:47:09 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
s.updates <- a
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *crdtAlerts) Get(fp model.Fingerprint) (*Alert, error) {
|
|
|
|
e, err := s.set.Get(fp.String())
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-07-10 16:27:17 +00:00
|
|
|
var alert Alert
|
|
|
|
err = json.Unmarshal(e.Value, &alert)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-07-07 07:47:09 +00:00
|
|
|
|
2015-07-10 16:27:17 +00:00
|
|
|
return &alert, nil
|
2015-07-07 07:47:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *crdtAlerts) GetAll() ([]*Alert, error) {
|
|
|
|
list, err := s.set.List()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var alerts []*Alert
|
|
|
|
for _, e := range list {
|
2015-07-10 16:27:17 +00:00
|
|
|
var alert Alert
|
|
|
|
err = json.Unmarshal(e.Value, &alert)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
alerts = append(alerts, &alert)
|
2015-07-07 07:47:09 +00:00
|
|
|
}
|
|
|
|
return alerts, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *crdtAlerts) Iter() <-chan *Alert {
|
|
|
|
ch := make(chan *Alert, 100)
|
|
|
|
|
|
|
|
// As we append the channel to the subcription channels
|
|
|
|
// before sending the current list of all alerts, no alert is lost.
|
|
|
|
// Handling the some alert twice is effectively a noop.
|
|
|
|
s.mtx.Lock()
|
|
|
|
s.subs = append(s.subs, ch)
|
|
|
|
s.mtx.Unlock()
|
|
|
|
|
|
|
|
prev, err := s.GetAll()
|
|
|
|
if err != nil {
|
|
|
|
log.Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
for _, alert := range prev {
|
|
|
|
ch <- alert
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2015-07-01 16:36:37 +00:00
|
|
|
type memAlerts struct {
|
2015-07-04 12:59:52 +00:00
|
|
|
alerts map[model.Fingerprint]*Alert
|
|
|
|
updates chan *Alert
|
|
|
|
subs []chan *Alert
|
|
|
|
mtx sync.RWMutex
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *memAlerts) run() {
|
|
|
|
for a := range s.updates {
|
|
|
|
s.mtx.RLock()
|
|
|
|
|
|
|
|
for _, sub := range s.subs {
|
|
|
|
select {
|
|
|
|
case <-time.After(100 * time.Millisecond):
|
|
|
|
log.Errorf("dropped alert %s for subscriber", a)
|
|
|
|
case sub <- a:
|
|
|
|
// Success
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
s.mtx.RUnlock()
|
|
|
|
}
|
2015-07-01 16:36:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *memAlerts) GetAll() ([]*Alert, error) {
|
|
|
|
s.mtx.RLock()
|
|
|
|
defer s.mtx.RUnlock()
|
|
|
|
|
2015-07-04 11:02:49 +00:00
|
|
|
alerts := make([]*Alert, 0, len(s.alerts))
|
|
|
|
for _, a := range s.alerts {
|
|
|
|
alerts = append(alerts, a)
|
2015-07-04 10:52:53 +00:00
|
|
|
}
|
2015-07-01 16:36:37 +00:00
|
|
|
|
2015-07-04 11:02:49 +00:00
|
|
|
// TODO(fabxc): specify whether time sorting is an interface
|
|
|
|
// requirement.
|
|
|
|
sort.Sort(alertTimeline(alerts))
|
|
|
|
|
2015-07-01 16:36:37 +00:00
|
|
|
return alerts, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *memAlerts) Add(alerts ...*Alert) error {
|
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
|
2015-07-02 16:38:05 +00:00
|
|
|
for _, alert := range alerts {
|
2015-07-04 10:52:53 +00:00
|
|
|
fp := alert.Fingerprint()
|
|
|
|
|
|
|
|
// Last write wins.
|
2015-07-04 12:05:04 +00:00
|
|
|
if prev, ok := s.alerts[fp]; !ok || !prev.Timestamp.After(alert.Timestamp) {
|
2015-07-04 10:52:53 +00:00
|
|
|
s.alerts[fp] = alert
|
|
|
|
}
|
|
|
|
|
2015-07-04 12:59:52 +00:00
|
|
|
s.updates <- alert
|
2015-07-02 16:38:05 +00:00
|
|
|
}
|
2015-07-04 10:52:53 +00:00
|
|
|
|
2015-07-01 16:36:37 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-07-04 10:52:53 +00:00
|
|
|
func (s *memAlerts) Get(fp model.Fingerprint) (*Alert, error) {
|
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
|
|
|
|
if a, ok := s.alerts[fp]; ok {
|
|
|
|
return a, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, fmt.Errorf("alert with fingerprint %s does not exist", fp)
|
|
|
|
}
|
|
|
|
|
2015-07-04 13:50:42 +00:00
|
|
|
func (s *memAlerts) Del(fp model.Fingerprint) error {
|
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
|
|
|
|
delete(s.alerts, fp)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-07-04 12:59:52 +00:00
|
|
|
func (s *memAlerts) Iter() <-chan *Alert {
|
|
|
|
ch := make(chan *Alert, 100)
|
|
|
|
|
|
|
|
s.mtx.Lock()
|
|
|
|
s.subs = append(s.subs, ch)
|
|
|
|
s.mtx.Unlock()
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
prev, _ := s.GetAll()
|
|
|
|
|
|
|
|
for _, alert := range prev {
|
|
|
|
ch <- alert
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
return ch
|
2015-07-02 16:38:05 +00:00
|
|
|
}
|
|
|
|
|
2015-07-01 11:17:08 +00:00
|
|
|
type memSilences struct {
|
2015-07-04 10:52:53 +00:00
|
|
|
sils map[string]*Silence
|
2015-07-01 11:17:08 +00:00
|
|
|
|
2015-07-04 10:52:53 +00:00
|
|
|
mtx sync.RWMutex
|
2015-07-01 11:17:08 +00:00
|
|
|
nextID uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *memSilences) genID() string {
|
|
|
|
sid := fmt.Sprintf("%x", s.nextID)
|
|
|
|
s.nextID++
|
|
|
|
return sid
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *memSilences) Get(sid string) (*Silence, error) {
|
2015-07-04 10:52:53 +00:00
|
|
|
s.mtx.RLock()
|
|
|
|
defer s.mtx.RUnlock()
|
|
|
|
|
|
|
|
if sil, ok := s.sils[sid]; ok {
|
|
|
|
return sil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, fmt.Errorf("silence with ID %s does not exist", sid)
|
2015-07-01 11:17:08 +00:00
|
|
|
}
|
2015-07-04 10:52:53 +00:00
|
|
|
|
2015-07-01 11:17:08 +00:00
|
|
|
func (s *memSilences) Del(sid string) error {
|
2015-07-04 10:52:53 +00:00
|
|
|
if _, ok := s.sils[sid]; !ok {
|
2015-07-01 11:17:08 +00:00
|
|
|
return fmt.Errorf("silence with ID %s does not exist", sid)
|
|
|
|
}
|
2015-07-04 10:52:53 +00:00
|
|
|
|
|
|
|
delete(s.sils, sid)
|
2015-07-01 11:17:08 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-07-09 13:01:38 +00:00
|
|
|
func (s *memSilences) List() ([]*Silence, error) {
|
2015-07-01 11:17:08 +00:00
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
|
2015-07-04 10:52:53 +00:00
|
|
|
sils := make([]*Silence, 0, len(s.sils))
|
|
|
|
for _, sil := range s.sils {
|
2015-06-30 12:29:30 +00:00
|
|
|
sils = append(sils, sil)
|
|
|
|
}
|
|
|
|
return sils, nil
|
|
|
|
}
|
|
|
|
|
2015-07-01 11:17:08 +00:00
|
|
|
func (s *memSilences) Set(sil *Silence) error {
|
|
|
|
s.mtx.RLock()
|
|
|
|
defer s.mtx.RUnlock()
|
|
|
|
|
|
|
|
if sil.ID == "" {
|
|
|
|
sil.ID = s.genID()
|
|
|
|
}
|
|
|
|
|
2015-07-04 10:52:53 +00:00
|
|
|
s.sils[sil.ID] = sil
|
2015-06-30 12:29:30 +00:00
|
|
|
return nil
|
|
|
|
}
|