Notify only when new firing alerts are added (#1205)
After the initial notification has been sent, AlertManager shouldn't notify the receiver again when no new alerts have been added to the group during group_interval. This change also modifies the acceptance test framework to assert that no notification has been received in a given interval.
This commit is contained in:
parent
b45c11b561
commit
62b957cc14
|
@ -21,21 +21,6 @@ func (m *Entry) IsFiringSubset(subset map[uint64]struct{}) bool {
|
|||
set[m.FiringAlerts[i]] = struct{}{}
|
||||
}
|
||||
|
||||
return isSubset(set, subset)
|
||||
}
|
||||
|
||||
// IsResolvedSubset returns whether the given subset is a subset of the alerts
|
||||
// that were resolved at the time of the last notification.
|
||||
func (m *Entry) IsResolvedSubset(subset map[uint64]struct{}) bool {
|
||||
set := map[uint64]struct{}{}
|
||||
for i := range m.ResolvedAlerts {
|
||||
set[m.ResolvedAlerts[i]] = struct{}{}
|
||||
}
|
||||
|
||||
return isSubset(set, subset)
|
||||
}
|
||||
|
||||
func isSubset(set, subset map[uint64]struct{}) bool {
|
||||
for k := range subset {
|
||||
_, exists := set[k]
|
||||
if !exists {
|
||||
|
|
|
@ -32,34 +32,6 @@ func TestIsFiringSubset(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestIsResolvedSubset(t *testing.T) {
|
||||
e := &Entry{
|
||||
ResolvedAlerts: []uint64{1, 2, 3},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
subset map[uint64]struct{}
|
||||
expected bool
|
||||
}{
|
||||
{newSubset(), true}, //empty subset
|
||||
{newSubset(1), true},
|
||||
{newSubset(2), true},
|
||||
{newSubset(3), true},
|
||||
{newSubset(1, 2), true},
|
||||
{newSubset(1, 2), true},
|
||||
{newSubset(1, 2, 3), true},
|
||||
{newSubset(4), false},
|
||||
{newSubset(1, 5), false},
|
||||
{newSubset(1, 2, 3, 6), false},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
if result := e.IsResolvedSubset(test.subset); result != test.expected {
|
||||
t.Errorf("Expected %t, got %t for subset %v", test.expected, result, elements(test.subset))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newSubset(elements ...uint64) map[uint64]struct{} {
|
||||
subset := make(map[uint64]struct{})
|
||||
for _, el := range elements {
|
||||
|
|
|
@ -481,17 +481,13 @@ func (n *DedupStage) needsUpdate(entry *nflogpb.Entry, firing, resolved map[uint
|
|||
return true, nil
|
||||
}
|
||||
|
||||
// If the current alert group and last notification contain no firing alert
|
||||
// and the resolved alerts are different, it means that some alerts have
|
||||
// been fired and resolved during the last group_wait interval. In this
|
||||
// case, there is no need to notify the receiver since it doesn't know
|
||||
// Notify about all alerts being resolved. If the current alert group and
|
||||
// last notification contain no firing alert, it means that some alerts
|
||||
// have been fired and resolved during the last group_wait interval. In
|
||||
// this case, there is no need to notify the receiver since it doesn't know
|
||||
// about them.
|
||||
if len(firing) == 0 && len(entry.FiringAlerts) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if !entry.IsResolvedSubset(resolved) {
|
||||
return true, nil
|
||||
if len(firing) == 0 {
|
||||
return len(entry.FiringAlerts) > 0, nil
|
||||
}
|
||||
|
||||
// Nothing changed, only notify if the repeat interval has passed.
|
||||
|
|
|
@ -144,6 +144,36 @@ func TestDedupStageNeedsUpdate(t *testing.T) {
|
|||
repeat: 10 * time.Minute,
|
||||
resolvedAlerts: alertHashSet(3, 4, 5),
|
||||
res: false,
|
||||
}, {
|
||||
entry: &nflogpb.Entry{
|
||||
FiringAlerts: []uint64{1, 2},
|
||||
ResolvedAlerts: []uint64{3},
|
||||
Timestamp: now.Add(-11 * time.Minute),
|
||||
},
|
||||
repeat: 10 * time.Minute,
|
||||
firingAlerts: alertHashSet(1),
|
||||
resolvedAlerts: alertHashSet(2, 3),
|
||||
res: true,
|
||||
}, {
|
||||
entry: &nflogpb.Entry{
|
||||
FiringAlerts: []uint64{1, 2},
|
||||
ResolvedAlerts: []uint64{3},
|
||||
Timestamp: now.Add(-9 * time.Minute),
|
||||
},
|
||||
repeat: 10 * time.Minute,
|
||||
firingAlerts: alertHashSet(1),
|
||||
resolvedAlerts: alertHashSet(2, 3),
|
||||
res: false,
|
||||
}, {
|
||||
entry: &nflogpb.Entry{
|
||||
FiringAlerts: []uint64{1, 2},
|
||||
ResolvedAlerts: []uint64{3},
|
||||
Timestamp: now.Add(-9 * time.Minute),
|
||||
},
|
||||
repeat: 10 * time.Minute,
|
||||
firingAlerts: alertHashSet(),
|
||||
resolvedAlerts: alertHashSet(1, 2, 3),
|
||||
res: true,
|
||||
},
|
||||
}
|
||||
for i, c := range cases {
|
||||
|
|
|
@ -387,10 +387,8 @@ receivers:
|
|||
Alert("alertname", "test", "lbl", "v2").Active(1),
|
||||
Alert("alertname", "test", "lbl", "v3").Active(3),
|
||||
)
|
||||
co1.Want(Between(12, 12.5),
|
||||
Alert("alertname", "test", "lbl", "v2").Active(1, 11),
|
||||
Alert("alertname", "test", "lbl", "v3").Active(3),
|
||||
)
|
||||
// no notification should be sent after group_interval because no new alert has been fired
|
||||
co1.Want(Between(12, 12.5))
|
||||
|
||||
co2.Want(Between(2, 2.5),
|
||||
Alert("alertname", "test", "lbl", "v1").Active(1),
|
||||
|
@ -400,9 +398,7 @@ receivers:
|
|||
Alert("alertname", "test", "lbl", "v2").Active(1),
|
||||
Alert("alertname", "test", "lbl", "v3").Active(3),
|
||||
)
|
||||
co2.Want(Between(12, 12.5),
|
||||
Alert("alertname", "test", "lbl", "v3").Active(3),
|
||||
)
|
||||
co1.Want(Between(12, 12.5))
|
||||
|
||||
at.Run()
|
||||
}
|
||||
|
|
|
@ -92,8 +92,15 @@ func (c *Collector) check() string {
|
|||
for iv, expected := range c.expected {
|
||||
report += fmt.Sprintf("interval %v\n", iv)
|
||||
|
||||
var alerts []model.Alerts
|
||||
for at, got := range c.collected {
|
||||
if iv.contains(at) {
|
||||
alerts = append(alerts, got...)
|
||||
}
|
||||
}
|
||||
|
||||
for _, exp := range expected {
|
||||
var found model.Alerts
|
||||
found := len(exp) == 0 && len(alerts) == 0
|
||||
|
||||
report += fmt.Sprintf("---\n")
|
||||
|
||||
|
@ -101,22 +108,14 @@ func (c *Collector) check() string {
|
|||
report += fmt.Sprintf("- %v\n", c.opts.alertString(e))
|
||||
}
|
||||
|
||||
for at, got := range c.collected {
|
||||
if !iv.contains(at) {
|
||||
continue
|
||||
}
|
||||
for _, a := range got {
|
||||
for _, a := range alerts {
|
||||
if batchesEqual(exp, a, c.opts) {
|
||||
found = a
|
||||
break
|
||||
}
|
||||
}
|
||||
if found != nil {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if found != nil {
|
||||
if found {
|
||||
report += fmt.Sprintf(" [ ✓ ]\n")
|
||||
} else {
|
||||
c.t.Fail()
|
||||
|
|
Loading…
Reference in New Issue