feat: health and readiness check of prometheus server in CLI (promtool) (#12096)

* feat: health and readiness check of prometheus server in CLI (promtool)

Signed-off-by: nidhey27 <nidhey.indurkar@infracloud.io>
This commit is contained in:
Nidhey Nitin Indurkar 2023-04-04 02:02:39 +05:30 committed by GitHub
parent 4af28f8cf6
commit 3f7beeecc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 91 additions and 0 deletions

View File

@ -71,6 +71,8 @@ const (
lintOptionAll = "all"
lintOptionDuplicateRules = "duplicate-rules"
lintOptionNone = "none"
checkHealth = "/-/healthy"
checkReadiness = "/-/ready"
)
var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
@ -87,6 +89,7 @@ func main() {
app.HelpFlag.Short('h')
checkCmd := app.Command("check", "Check the resources for validity.")
checkCmd.Flag("http.config.file", "HTTP client configuration file for promtool to connect to Prometheus.").PlaceHolder("<filename>").ExistingFileVar(&httpConfigFilePath)
sdCheckCmd := checkCmd.Command("service-discovery", "Perform service discovery for the given job name and report the results, including relabeling.")
sdConfigFile := sdCheckCmd.Arg("config-file", "The prometheus config file.").Required().ExistingFile()
@ -113,6 +116,18 @@ func main() {
"The config files to check.",
).Required().ExistingFiles()
checkServerHealthCmd := checkCmd.Command("healthy", "Check if the Prometheus server is healthy.")
serverHealthURLArg := checkServerHealthCmd.Arg(
"server",
"The URL of the Prometheus server to check (e.g. http://localhost:9090)",
).URL()
checkServerReadyCmd := checkCmd.Command("ready", "Check if the Prometheus server is ready.")
serverReadyURLArg := checkServerReadyCmd.Arg(
"server",
"The URL of the Prometheus server to check (e.g. http://localhost:9090)",
).URL()
checkRulesCmd := checkCmd.Command("rules", "Check if the rule files are valid or not.")
ruleFiles := checkRulesCmd.Arg(
"rule-files",
@ -276,6 +291,12 @@ func main() {
case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
case checkServerHealthCmd.FullCommand():
os.Exit(checkErr(CheckServerStatus(*serverHealthURLArg, checkHealth, httpRoundTripper)))
case checkServerReadyCmd.FullCommand():
os.Exit(checkErr(CheckServerStatus(*serverReadyURLArg, checkReadiness, httpRoundTripper)))
case checkWebConfigCmd.FullCommand():
os.Exit(CheckWebConfig(*webConfigFiles...))
@ -369,6 +390,45 @@ func (ls lintConfig) lintDuplicateRules() bool {
return ls.all || ls.duplicateRules
}
const promDefaultURL = "http://localhost:9090"
// Check server status - healthy & ready.
func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error {
if serverURL == nil {
serverURL, _ = url.Parse(promDefaultURL)
}
config := api.Config{
Address: serverURL.String() + checkEndpoint,
RoundTripper: roundTripper,
}
// Create new client.
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return err
}
request, err := http.NewRequest("GET", config.Address, nil)
if err != nil {
return err
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
response, dataBytes, err := c.Do(ctx, request)
if err != nil {
return err
}
if response.StatusCode != http.StatusOK {
return fmt.Errorf("check failed: URL=%s, status=%d", serverURL, response.StatusCode)
}
fmt.Fprintln(os.Stderr, " SUCCESS: ", string(dataBytes))
return nil
}
// CheckConfig validates configuration files.
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files ...string) int {
failed := false

View File

@ -58,6 +58,7 @@ Check the resources for validity.
| Flag | Description |
| --- | --- |
| <code class="text-nowrap">--http.config.file</code> | HTTP client configuration file for promtool to connect to Prometheus. |
| <code class="text-nowrap">--extended</code> | Print extended information related to the cardinality of the metrics. |
@ -130,6 +131,36 @@ Check if the web config files are valid or not.
##### `promtool check healthy`
Check if the Prometheus server is healthy.
###### Arguments
| Argument | Description |
| --- | --- |
| server | The URL of the Prometheus server to check (e.g. http://localhost:9090) |
##### `promtool check ready`
Check if the Prometheus server is ready.
###### Arguments
| Argument | Description |
| --- | --- |
| server | The URL of the Prometheus server to check (e.g. http://localhost:9090) |
##### `promtool check rules`
Check if the rule files are valid or not.