From 72ef6e04e1ebe1ef54ff18fabd99e360531597f2 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 30 Apr 2021 10:11:10 +0200 Subject: [PATCH] Fix race condition causing 1st alert to not be immediately delivered when group_wait is 0s Signed-off-by: Marco Pracucci --- dispatch/dispatch.go | 46 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/dispatch/dispatch.go b/dispatch/dispatch.go index d62e0b8d..a3357857 100644 --- a/dispatch/dispatch.go +++ b/dispatch/dispatch.go @@ -290,30 +290,36 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *Route) { d.aggrGroups[route] = group } - // If the group does not exist, create it. ag, ok := group[fp] - if !ok { - ag = newAggrGroup(d.ctx, groupLabels, route, d.timeout, d.logger) - group[fp] = ag - d.metrics.aggrGroups.Inc() - - go ag.run(func(ctx context.Context, alerts ...*types.Alert) bool { - _, _, err := d.stage.Exec(ctx, d.logger, alerts...) - if err != nil { - lvl := level.Error(d.logger) - if ctx.Err() == context.Canceled { - // It is expected for the context to be canceled on - // configuration reload or shutdown. In this case, the - // message should only be logged at the debug level. - lvl = level.Debug(d.logger) - } - lvl.Log("msg", "Notify for alerts failed", "num_alerts", len(alerts), "err", err) - } - return err == nil - }) + if ok { + ag.insert(alert) + return } + // If the group does not exist, create it. + ag = newAggrGroup(d.ctx, groupLabels, route, d.timeout, d.logger) + group[fp] = ag + d.metrics.aggrGroups.Inc() + + // Insert the 1st alert in the group before starting the group's run() + // function, to make sure that when the run() will be executed the 1st + // alert is already there. ag.insert(alert) + + go ag.run(func(ctx context.Context, alerts ...*types.Alert) bool { + _, _, err := d.stage.Exec(ctx, d.logger, alerts...) + if err != nil { + lvl := level.Error(d.logger) + if ctx.Err() == context.Canceled { + // It is expected for the context to be canceled on + // configuration reload or shutdown. In this case, the + // message should only be logged at the debug level. + lvl = level.Debug(d.logger) + } + lvl.Log("msg", "Notify for alerts failed", "num_alerts", len(alerts), "err", err) + } + return err == nil + }) } func getGroupLabels(alert *types.Alert, route *Route) model.LabelSet {