diff --git a/notify/notify.go b/notify/notify.go index e454800b..f2d14b74 100644 --- a/notify/notify.go +++ b/notify/notify.go @@ -213,6 +213,29 @@ func Dedup(notifies provider.Notifies, n Notifier) *DedupingNotifier { return &DedupingNotifier{notifies: notifies, notifier: n} } +// hasUpdates checks an alert against the last notification that was made +// about it. +func (n *DedupingNotifier) hasUpdate(alert *types.Alert, last *types.NotifyInfo, now time.Time, interval time.Duration) bool { + if last != nil { + if alert.Resolved() { + if last.Resolved { + return false + } + } else if !last.Resolved { + // Do not send again if last was delivered unless + // the repeat interval has already passed. + if !now.After(last.Timestamp.Add(interval)) { + return false + } + } + } else if alert.Resolved() { + // If the alert is resolved but we never notified about it firing, + // there is nothing to do. + return false + } + return true +} + // Notify implements the Notifier interface. func (n *DedupingNotifier) Notify(ctx context.Context, alerts ...*types.Alert) error { name, ok := Receiver(ctx) @@ -235,57 +258,27 @@ func (n *DedupingNotifier) Notify(ctx context.Context, alerts ...*types.Alert) e fps = append(fps, a.Fingerprint()) } - notifies, err := n.notifies.Get(name, fps...) + notifyInfo, err := n.notifies.Get(name, fps...) if err != nil { return err } - var ( - doResend bool - resendQueue []*types.Alert - filtered []*types.Alert - ) - for i, a := range alerts { - last := notifies[i] - - if last != nil { - if a.Resolved() { - if last.Resolved { - continue - } - } else if !last.Resolved { - // Do not send again if last was delivered unless - // the repeat interval has already passed. - if !now.After(last.Timestamp.Add(repeatInterval)) { - // To not repeat initial batch fragmentation after the repeat interval - // has passed, store them and send them anyway if on of the other - // alerts has already passed the repeat interval. - // This way we unify batches again. - resendQueue = append(resendQueue, a) - - continue - } else { - doResend = true - } - } - } else if a.Resolved() { - // If the alert is resolved but we never notified about it firing, - // there is nothing to do. - continue + // If we have to notify about any of the alerts, we send a notification + // for the entire batch. + var send bool + for i, alert := range alerts { + if n.hasUpdate(alert, notifyInfo[i], now, repeatInterval) { + send = true + break } - - filtered = append(filtered, a) } - - // As we are resending an alert anyway, resend all of them even if their - // repeat interval has not yet passed. - if doResend { - filtered = append(filtered, resendQueue...) + if !send { + return nil } var newNotifies []*types.NotifyInfo - for _, a := range filtered { + for _, a := range alerts { newNotifies = append(newNotifies, &types.NotifyInfo{ Alert: a.Fingerprint(), Receiver: name, @@ -294,7 +287,7 @@ func (n *DedupingNotifier) Notify(ctx context.Context, alerts ...*types.Alert) e }) } - if err := n.notifier.Notify(ctx, filtered...); err != nil { + if err := n.notifier.Notify(ctx, alerts...); err != nil { return err } diff --git a/notify/notify_test.go b/notify/notify_test.go index 3404f4e1..971bc442 100644 --- a/notify/notify_test.go +++ b/notify/notify_test.go @@ -43,6 +43,129 @@ func (n *failNotifier) Notify(ctx context.Context, as ...*types.Alert) error { return fmt.Errorf("some error") } +func TestDedupingNotifierHasUpdate(t *testing.T) { + var ( + n = &DedupingNotifier{} + now = time.Now() + interval = 100 * time.Minute + ) + cases := []struct { + inAlert *types.Alert + inNotifyInfo *types.NotifyInfo + result bool + }{ + // A new alert about which there's no previous notification information. + { + inAlert: &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"alertname": "a"}, + StartsAt: now.Add(-10 * time.Minute), + }, + }, + inNotifyInfo: nil, + result: true, + }, + // A new alert about which there's no previous notification information. + // It is already resolved, so there's no use in sending a notification. + { + inAlert: &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"alertname": "a"}, + StartsAt: now.Add(-10 * time.Minute), + EndsAt: now, + }, + }, + inNotifyInfo: nil, + result: false, + }, + // An alert that has been firing is now resolved for the first time. + { + inAlert: &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"alertname": "a"}, + StartsAt: now.Add(-10 * time.Minute), + EndsAt: now, + }, + }, + inNotifyInfo: &types.NotifyInfo{ + Alert: model.LabelSet{"alertname": "a"}.Fingerprint(), + Resolved: false, + Timestamp: now.Add(-time.Minute), + }, + result: true, + }, + // A resolved alert for which we have already sent a resolved notification. + { + inAlert: &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"alertname": "a"}, + StartsAt: now.Add(-10 * time.Minute), + EndsAt: now, + }, + }, + inNotifyInfo: &types.NotifyInfo{ + Alert: model.LabelSet{"alertname": "a"}.Fingerprint(), + Resolved: true, + Timestamp: now.Add(-time.Minute), + }, + result: false, + }, + // An alert that was resolved last time but is now firing again. + { + inAlert: &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"alertname": "a"}, + StartsAt: now.Add(-3 * time.Minute), + }, + }, + inNotifyInfo: &types.NotifyInfo{ + Alert: model.LabelSet{"alertname": "a"}.Fingerprint(), + Resolved: true, + Timestamp: now.Add(-4 * time.Minute), + }, + result: true, + }, + // A firing alert about which we already notified. The last notification + // is less than the repeat interval ago. + { + inAlert: &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"alertname": "a"}, + StartsAt: now.Add(-10 * time.Minute), + }, + }, + inNotifyInfo: &types.NotifyInfo{ + Alert: model.LabelSet{"alertname": "a"}.Fingerprint(), + Resolved: false, + Timestamp: now.Add(-15 * time.Minute), + }, + result: false, + }, + // A firing alert about which we already notified. The last notification + // is more than the repeat interval ago. + { + inAlert: &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"alertname": "a"}, + StartsAt: now.Add(-10 * time.Minute), + }, + }, + inNotifyInfo: &types.NotifyInfo{ + Alert: model.LabelSet{"alertname": "a"}.Fingerprint(), + Resolved: false, + Timestamp: now.Add(-115 * time.Minute), + }, + result: true, + }, + } + + for i, c := range cases { + if n.hasUpdate(c.inAlert, c.inNotifyInfo, now, interval) != c.result { + t.Errorf("unexpected hasUpdates result for case %d", i) + } + } +} + func TestDedupingNotifier(t *testing.T) { var ( record = &recordNotifier{} @@ -66,69 +189,19 @@ func TestDedupingNotifier(t *testing.T) { Labels: model.LabelSet{"alertname": "1"}, EndsAt: now.Add(-5 * time.Minute), }, - }, { - Alert: model.Alert{ - Labels: model.LabelSet{"alertname": "2"}, - EndsAt: now.Add(-9 * time.Minute), - }, - }, { - Alert: model.Alert{ - Labels: model.LabelSet{"alertname": "3"}, - EndsAt: now.Add(-10 * time.Minute), - }, - }, { - Alert: model.Alert{ - Labels: model.LabelSet{"alertname": "4"}, - }, - }, { - Alert: model.Alert{ - Labels: model.LabelSet{"alertname": "5"}, - }, }, } - var fps []model.Fingerprint - for _, a := range alerts { - fps = append(fps, a.Fingerprint()) - } - + // Set an initial NotifyInfo to ensure that on notification failure + // nothing changes. nsBefore := []*types.NotifyInfo{ - // The first a new alert starting now. nil, - // The second alert was not previously notified about and - // is already resolved. - nil, - // The third alert is an attempt to resolve a previously - // firing alert. { - Alert: fps[2], + Alert: alerts[1].Fingerprint(), Receiver: "name", Resolved: false, Timestamp: now.Add(-10 * time.Minute), }, - // The fourth alert is an attempt to resolve an alert again - // even though the previous notification succeeded. - { - Alert: fps[3], - Receiver: "name", - Resolved: true, - Timestamp: now.Add(-10 * time.Minute), - }, - // The fifth alert resends a previously successful notification - // that was longer than ago than the repeat interval. - { - Alert: fps[4], - Receiver: "name", - Resolved: false, - Timestamp: now.Add(-110 * time.Minute), - }, - // The sixth alert is a firing again after being resolved before. - { - Alert: fps[5], - Receiver: "name", - Resolved: true, - Timestamp: now.Add(3 * time.Minute), - }, } if err := notifies.Set(nsBefore...); err != nil { @@ -140,7 +213,7 @@ func TestDedupingNotifier(t *testing.T) { t.Fatalf("Fail notifier did not fail") } // After a failing notify the notifies data must be unchanged. - nsCur, err := notifies.Get("name", fps...) + nsCur, err := notifies.Get("name", alerts[0].Fingerprint(), alerts[1].Fingerprint()) if err != nil { t.Fatalf("Error getting notify info: %s", err) } @@ -153,46 +226,29 @@ func TestDedupingNotifier(t *testing.T) { t.Fatalf("Notify failed: %s", err) } - alertsExp := []*types.Alert{ - alerts[0], - alerts[2], - alerts[4], - alerts[5], + if !reflect.DeepEqual(record.alerts, alerts) { + t.Fatalf("Expected alerts %v, got %v", alerts, record.alerts) + } + nsCur, err = notifies.Get("name", alerts[0].Fingerprint(), alerts[1].Fingerprint()) + if err != nil { + t.Fatalf("Error getting notifies: %s", err) } nsAfter := []*types.NotifyInfo{ { - Alert: fps[0], - Receiver: "name", - Resolved: false, - }, - nil, - { - Alert: fps[2], - Receiver: "name", - Resolved: true, - }, - nsBefore[3], - { - Alert: fps[4], - Receiver: "name", - Resolved: false, + Alert: alerts[0].Fingerprint(), + Receiver: "name", + Resolved: false, + Timestamp: now, }, { - Alert: fps[5], - Receiver: "name", - Resolved: false, + Alert: alerts[1].Fingerprint(), + Receiver: "name", + Resolved: true, + Timestamp: now, }, } - if !reflect.DeepEqual(record.alerts, alertsExp) { - t.Fatalf("Expected alerts %v, got %v", alertsExp, record.alerts) - } - nsCur, err = notifies.Get("name", fps...) - if err != nil { - t.Fatalf("Error getting notifies: %s", err) - } - for i, after := range nsAfter { cur := nsCur[i] diff --git a/test/acceptance/send_test.go b/test/acceptance/send_test.go index 3523c604..c27b3183 100644 --- a/test/acceptance/send_test.go +++ b/test/acceptance/send_test.go @@ -229,19 +229,22 @@ receivers: am := at.Alertmanager(fmt.Sprintf(conf, wh.Address())) am.Push(At(1.1), Alert("alertname", "test1").Active(1)) - am.Push(At(1.9), Alert("alertname", "test5").Active(1)) - am.Push(At(2.3), - Alert("alertname", "test2").Active(1.5), - Alert("alertname", "test3").Active(1.5), - Alert("alertname", "test4").Active(1.6), - ) + am.Push(At(1.7), Alert("alertname", "test5").Active(1)) co.Want(Between(2.0, 2.5), Alert("alertname", "test1").Active(1), Alert("alertname", "test5").Active(1), ) - // Only expect the new ones with the next group interval. - co.Want(Between(3, 3.5), + + am.Push(At(3.3), + Alert("alertname", "test2").Active(1.5), + Alert("alertname", "test3").Active(1.5), + Alert("alertname", "test4").Active(1.6), + ) + + co.Want(Between(4.1, 4.5), + Alert("alertname", "test1").Active(1), + Alert("alertname", "test5").Active(1), Alert("alertname", "test2").Active(1.5), Alert("alertname", "test3").Active(1.5), Alert("alertname", "test4").Active(1.6), @@ -250,10 +253,7 @@ receivers: // While no changes happen expect no additional notifications // until the 5s repeat interval has ended. - // The last three notifications should sent with the first two even - // though their repeat interval has not yet passed. This way fragmented - // batches are unified and notification noise reduced. - co.Want(Between(7, 7.5), + co.Want(Between(9.1, 9.5), Alert("alertname", "test1").Active(1), Alert("alertname", "test5").Active(1), Alert("alertname", "test2").Active(1.5),