From 0f3c1bf6cffedab319804406dcfeed90e7fb9a75 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Mon, 12 Oct 2020 21:30:59 +0200 Subject: [PATCH] Report valid configs in the respective metrics from the beginning In #7399, an early validity check of the config was introduced to prevent the scenario where an invalid config is only detected after a possibly very long startup procedure. However, the respective success metrics are not updated after the initial validation so that the success metrics suggest an invalid config. If the startup procedure, like replaying the WAL, really takes very long, alerts about invalid config will trigger. This commit sets the succes metrics after initial validation. They will be set again after the "real" config (re-)load, but that shouldn't be a problem. The metric now truthfully represents whenever the config was successfully loaded, no matter if the result was then thrown away (because it was just for validation) or actually used. Signed-off-by: beorn7 --- cmd/prometheus/main.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index fa6b6fa88..52b646d18 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -290,6 +290,14 @@ func main() { level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "err", err) os.Exit(2) } + // Now that the validity of the config is established, set the config + // success metrics accordingly, although the config isn't really loaded + // yet. This will happen later (including setting these metrics again), + // but if we don't do it now, the metrics will stay at zero until the + // startup procedure is complete, which might take long enough to + // trigger alerts about an invalid config. + configSuccess.Set(1) + configSuccessTime.SetToCurrentTime() cfg.web.ReadTimeout = time.Duration(cfg.webTimeout) // Default -web.route-prefix to path of -web.external-url.