Reorder startup and shutdown to prevent panics. (#4321)

Start rule manager only after tsdb and config is loaded.
Stop rule manager before tsdb to avoid writing to closed storage.
Wait for any in-progress reloads to complete before shutting
down rule manager, so that rule manager doesn't get updated after
being shut down.

Remove incorrect comment around shutting down query enginge.
Log when config reload is completed.

Fixes #4133
Fixes #4262

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
This commit is contained in:
Brian Brazil 2018-07-04 13:41:16 +01:00 committed by GitHub
parent 9f2f6accba
commit 68e8b80ffe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 21 additions and 21 deletions

View File

@ -472,7 +472,9 @@ func main() {
},
func(err error) {
close(cancel)
// Wait for any in-progress reloads to complete to avoid
// reloading things after they have been shutdown.
cancel <- struct{}{}
},
)
}
@ -506,6 +508,23 @@ func main() {
},
)
}
{
// Rule manager.
// TODO(krasi) refactor ruleManager.Run() to be blocking to avoid using an extra blocking channel.
cancel := make(chan struct{})
g.Add(
func() error {
<-reloadReady.C
ruleManager.Run()
<-cancel
return nil
},
func(err error) {
ruleManager.Stop()
close(cancel)
},
)
}
{
// TSDB.
cancel := make(chan struct{})
@ -547,30 +566,10 @@ func main() {
return nil
},
func(err error) {
// Keep this interrupt before the ruleManager.Stop().
// Shutting down the query engine before the rule manager will cause pending queries
// to be canceled and ensures a quick shutdown of the rule manager.
cancelWeb()
},
)
}
{
// Rule manager.
// TODO(krasi) refactor ruleManager.Run() to be blocking to avoid using an extra blocking channel.
cancel := make(chan struct{})
g.Add(
func() error {
ruleManager.Run()
<-cancel
return nil
},
func(err error) {
ruleManager.Stop()
close(cancel)
},
)
}
{
// Notifier.
@ -627,6 +626,7 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config
if failed {
return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%s)", filename)
}
level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename)
return nil
}