Merge pull request #1325 from prometheus/notifyfix

Fix rule manager shutdown issues
This commit is contained in:
Fabian Reinartz 2016-01-20 13:23:35 +01:00
commit 0ecf8e98d1
3 changed files with 15 additions and 5 deletions

View File

@ -132,7 +132,8 @@ func Main() int {
}
}()
// Start all components.
// Start all components. The order is NOT arbitrary.
if err := memStorage.Start(); err != nil {
log.Errorln("Error opening memory series storage:", err)
return 1
@ -155,15 +156,19 @@ func Main() int {
prometheus.MustRegister(configSuccess)
prometheus.MustRegister(configSuccessTime)
go ruleManager.Run()
defer ruleManager.Stop()
// The notification is a dependency of the rule manager. It has to be
// started before and torn down afterwards.
go notificationHandler.Run()
defer notificationHandler.Stop()
go ruleManager.Run()
defer ruleManager.Stop()
go targetManager.Run()
defer targetManager.Stop()
// Shutting down the query engine before the rule manager will cause pending queries
// to be canceled and ensures a quick shutdown of the rule manager.
defer queryEngine.Stop()
go webHandler.Run()

View File

@ -200,6 +200,7 @@ func (n *Handler) Run() {
}
// SubmitReqs queues the given notification requests for processing.
// Panics if called on a handler that is not running.
func (n *Handler) Send(alerts ...*model.Alert) {
n.mtx.Lock()
defer n.mtx.Unlock()

View File

@ -220,8 +220,12 @@ func (g *Group) eval() {
vector, err := rule.eval(now, g.opts.QueryEngine)
if err != nil {
// Canceled queries are intentional termination of queries. This normally
// happens on shutdown and thus we skip logging of any errors here.
if _, ok := err.(promql.ErrQueryCanceled); !ok {
log.Warnf("Error while evaluating rule %q: %s", rule, err)
}
evalFailures.Inc()
log.Warnf("Error while evaluating rule %q: %s", rule, err)
}
var rtyp ruleType