From 4cba49155d91b155c878be123c8b2af27f762002 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Thu, 29 Mar 2018 12:22:49 +0200 Subject: [PATCH] dispatch: don't reset timer if flush is in-progress (#1301) When the aggregation group receives an alert that is past the initial group_wait value, it should reset its timer only if the timer has ever expired. Otherwise it means that the flush is already in-progress. --- dispatch/dispatch.go | 11 +++++------ test/acceptance/send_test.go | 7 +++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/dispatch/dispatch.go b/dispatch/dispatch.go index 3926146d..d863de4b 100644 --- a/dispatch/dispatch.go +++ b/dispatch/dispatch.go @@ -287,9 +287,9 @@ type aggrGroup struct { next *time.Timer timeout func(time.Duration) time.Duration - mtx sync.RWMutex - alerts map[model.Fingerprint]*types.Alert - hasSent bool + mtx sync.RWMutex + alerts map[model.Fingerprint]*types.Alert + hasFlushed bool } // newAggrGroup returns a new aggregation group. @@ -366,6 +366,7 @@ func (ag *aggrGroup) run(nf notifyFunc) { // Wait the configured interval before calling flush again. ag.mtx.Lock() ag.next.Reset(ag.opts.GroupInterval) + ag.hasFlushed = true ag.mtx.Unlock() ag.flush(func(alerts ...*types.Alert) bool { @@ -396,7 +397,7 @@ func (ag *aggrGroup) insert(alert *types.Alert) { // Immediately trigger a flush if the wait duration for this // alert is already over. - if !ag.hasSent && alert.StartsAt.Add(ag.opts.GroupWait).Before(time.Now()) { + if !ag.hasFlushed && alert.StartsAt.Add(ag.opts.GroupWait).Before(time.Now()) { ag.next.Reset(0) } } @@ -457,8 +458,6 @@ func (ag *aggrGroup) flush(notify func(...*types.Alert) bool) { delete(ag.alerts, fp) } } - - ag.hasSent = true ag.mtx.Unlock() } } diff --git a/test/acceptance/send_test.go b/test/acceptance/send_test.go index e1fe8652..53142373 100644 --- a/test/acceptance/send_test.go +++ b/test/acceptance/send_test.go @@ -406,10 +406,9 @@ receivers: func TestReload(t *testing.T) { t.Parallel() - // We create a notification config that fans out into two different - // webhooks. - // The succeeding one must still only receive the first successful - // notifications. Sending to the succeeding one must eventually succeed. + // This integration test ensures that the first alert isn't notified twice + // and repeat_interval applies after the AlertManager process has been + // reloaded. conf := ` route: receiver: "default"