Merge pull request #1325 from prometheus/notifyfix

Fix rule manager shutdown issues
This commit is contained in:
Fabian Reinartz 2016-01-20 13:23:35 +01:00
commit 0ecf8e98d1
3 changed files with 15 additions and 5 deletions

View File

@ -132,7 +132,8 @@ func Main() int {
} }
}() }()
// Start all components. // Start all components. The order is NOT arbitrary.
if err := memStorage.Start(); err != nil { if err := memStorage.Start(); err != nil {
log.Errorln("Error opening memory series storage:", err) log.Errorln("Error opening memory series storage:", err)
return 1 return 1
@ -155,15 +156,19 @@ func Main() int {
prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccess)
prometheus.MustRegister(configSuccessTime) prometheus.MustRegister(configSuccessTime)
go ruleManager.Run() // The notification is a dependency of the rule manager. It has to be
defer ruleManager.Stop() // started before and torn down afterwards.
go notificationHandler.Run() go notificationHandler.Run()
defer notificationHandler.Stop() defer notificationHandler.Stop()
go ruleManager.Run()
defer ruleManager.Stop()
go targetManager.Run() go targetManager.Run()
defer targetManager.Stop() defer targetManager.Stop()
// Shutting down the query engine before the rule manager will cause pending queries
// to be canceled and ensures a quick shutdown of the rule manager.
defer queryEngine.Stop() defer queryEngine.Stop()
go webHandler.Run() go webHandler.Run()

View File

@ -200,6 +200,7 @@ func (n *Handler) Run() {
} }
// SubmitReqs queues the given notification requests for processing. // SubmitReqs queues the given notification requests for processing.
// Panics if called on a handler that is not running.
func (n *Handler) Send(alerts ...*model.Alert) { func (n *Handler) Send(alerts ...*model.Alert) {
n.mtx.Lock() n.mtx.Lock()
defer n.mtx.Unlock() defer n.mtx.Unlock()

View File

@ -220,8 +220,12 @@ func (g *Group) eval() {
vector, err := rule.eval(now, g.opts.QueryEngine) vector, err := rule.eval(now, g.opts.QueryEngine)
if err != nil { if err != nil {
// Canceled queries are intentional termination of queries. This normally
// happens on shutdown and thus we skip logging of any errors here.
if _, ok := err.(promql.ErrQueryCanceled); !ok {
log.Warnf("Error while evaluating rule %q: %s", rule, err)
}
evalFailures.Inc() evalFailures.Inc()
log.Warnf("Error while evaluating rule %q: %s", rule, err)
} }
var rtyp ruleType var rtyp ruleType