dispatch: don't reset timer if flush is in-progress (#1301)

When the aggregation group receives an alert that is past the initial
group_wait value, it should reset its timer only if the timer has ever
expired. Otherwise it means that the flush is already in-progress.
This commit is contained in:
Simon Pasquier 2018-03-29 12:22:49 +02:00 committed by stuart nelson
parent 19715022a4
commit 4cba49155d
2 changed files with 8 additions and 10 deletions

View File

@ -287,9 +287,9 @@ type aggrGroup struct {
next *time.Timer
timeout func(time.Duration) time.Duration
mtx sync.RWMutex
alerts map[model.Fingerprint]*types.Alert
hasSent bool
mtx sync.RWMutex
alerts map[model.Fingerprint]*types.Alert
hasFlushed bool
}
// newAggrGroup returns a new aggregation group.
@ -366,6 +366,7 @@ func (ag *aggrGroup) run(nf notifyFunc) {
// Wait the configured interval before calling flush again.
ag.mtx.Lock()
ag.next.Reset(ag.opts.GroupInterval)
ag.hasFlushed = true
ag.mtx.Unlock()
ag.flush(func(alerts ...*types.Alert) bool {
@ -396,7 +397,7 @@ func (ag *aggrGroup) insert(alert *types.Alert) {
// Immediately trigger a flush if the wait duration for this
// alert is already over.
if !ag.hasSent && alert.StartsAt.Add(ag.opts.GroupWait).Before(time.Now()) {
if !ag.hasFlushed && alert.StartsAt.Add(ag.opts.GroupWait).Before(time.Now()) {
ag.next.Reset(0)
}
}
@ -457,8 +458,6 @@ func (ag *aggrGroup) flush(notify func(...*types.Alert) bool) {
delete(ag.alerts, fp)
}
}
ag.hasSent = true
ag.mtx.Unlock()
}
}

View File

@ -406,10 +406,9 @@ receivers:
func TestReload(t *testing.T) {
t.Parallel()
// We create a notification config that fans out into two different
// webhooks.
// The succeeding one must still only receive the first successful
// notifications. Sending to the succeeding one must eventually succeed.
// This integration test ensures that the first alert isn't notified twice
// and repeat_interval applies after the AlertManager process has been
// reloaded.
conf := `
route:
receiver: "default"