mirror of
https://github.com/prometheus/prometheus
synced 2025-03-29 14:57:20 +00:00
feat: health and readiness check of prometheus server in CLI (promtool) (#12096)
* feat: health and readiness check of prometheus server in CLI (promtool) Signed-off-by: nidhey27 <nidhey.indurkar@infracloud.io>
This commit is contained in:
parent
4af28f8cf6
commit
3f7beeecc6
@ -71,6 +71,8 @@ const (
|
|||||||
lintOptionAll = "all"
|
lintOptionAll = "all"
|
||||||
lintOptionDuplicateRules = "duplicate-rules"
|
lintOptionDuplicateRules = "duplicate-rules"
|
||||||
lintOptionNone = "none"
|
lintOptionNone = "none"
|
||||||
|
checkHealth = "/-/healthy"
|
||||||
|
checkReadiness = "/-/ready"
|
||||||
)
|
)
|
||||||
|
|
||||||
var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
|
var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
|
||||||
@ -87,6 +89,7 @@ func main() {
|
|||||||
app.HelpFlag.Short('h')
|
app.HelpFlag.Short('h')
|
||||||
|
|
||||||
checkCmd := app.Command("check", "Check the resources for validity.")
|
checkCmd := app.Command("check", "Check the resources for validity.")
|
||||||
|
checkCmd.Flag("http.config.file", "HTTP client configuration file for promtool to connect to Prometheus.").PlaceHolder("<filename>").ExistingFileVar(&httpConfigFilePath)
|
||||||
|
|
||||||
sdCheckCmd := checkCmd.Command("service-discovery", "Perform service discovery for the given job name and report the results, including relabeling.")
|
sdCheckCmd := checkCmd.Command("service-discovery", "Perform service discovery for the given job name and report the results, including relabeling.")
|
||||||
sdConfigFile := sdCheckCmd.Arg("config-file", "The prometheus config file.").Required().ExistingFile()
|
sdConfigFile := sdCheckCmd.Arg("config-file", "The prometheus config file.").Required().ExistingFile()
|
||||||
@ -113,6 +116,18 @@ func main() {
|
|||||||
"The config files to check.",
|
"The config files to check.",
|
||||||
).Required().ExistingFiles()
|
).Required().ExistingFiles()
|
||||||
|
|
||||||
|
checkServerHealthCmd := checkCmd.Command("healthy", "Check if the Prometheus server is healthy.")
|
||||||
|
serverHealthURLArg := checkServerHealthCmd.Arg(
|
||||||
|
"server",
|
||||||
|
"The URL of the Prometheus server to check (e.g. http://localhost:9090)",
|
||||||
|
).URL()
|
||||||
|
|
||||||
|
checkServerReadyCmd := checkCmd.Command("ready", "Check if the Prometheus server is ready.")
|
||||||
|
serverReadyURLArg := checkServerReadyCmd.Arg(
|
||||||
|
"server",
|
||||||
|
"The URL of the Prometheus server to check (e.g. http://localhost:9090)",
|
||||||
|
).URL()
|
||||||
|
|
||||||
checkRulesCmd := checkCmd.Command("rules", "Check if the rule files are valid or not.")
|
checkRulesCmd := checkCmd.Command("rules", "Check if the rule files are valid or not.")
|
||||||
ruleFiles := checkRulesCmd.Arg(
|
ruleFiles := checkRulesCmd.Arg(
|
||||||
"rule-files",
|
"rule-files",
|
||||||
@ -276,6 +291,12 @@ func main() {
|
|||||||
case checkConfigCmd.FullCommand():
|
case checkConfigCmd.FullCommand():
|
||||||
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
|
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
|
||||||
|
|
||||||
|
case checkServerHealthCmd.FullCommand():
|
||||||
|
os.Exit(checkErr(CheckServerStatus(*serverHealthURLArg, checkHealth, httpRoundTripper)))
|
||||||
|
|
||||||
|
case checkServerReadyCmd.FullCommand():
|
||||||
|
os.Exit(checkErr(CheckServerStatus(*serverReadyURLArg, checkReadiness, httpRoundTripper)))
|
||||||
|
|
||||||
case checkWebConfigCmd.FullCommand():
|
case checkWebConfigCmd.FullCommand():
|
||||||
os.Exit(CheckWebConfig(*webConfigFiles...))
|
os.Exit(CheckWebConfig(*webConfigFiles...))
|
||||||
|
|
||||||
@ -369,6 +390,45 @@ func (ls lintConfig) lintDuplicateRules() bool {
|
|||||||
return ls.all || ls.duplicateRules
|
return ls.all || ls.duplicateRules
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const promDefaultURL = "http://localhost:9090"
|
||||||
|
|
||||||
|
// Check server status - healthy & ready.
|
||||||
|
func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error {
|
||||||
|
if serverURL == nil {
|
||||||
|
serverURL, _ = url.Parse(promDefaultURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
config := api.Config{
|
||||||
|
Address: serverURL.String() + checkEndpoint,
|
||||||
|
RoundTripper: roundTripper,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new client.
|
||||||
|
c, err := api.NewClient(config)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, "error creating API client:", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
request, err := http.NewRequest("GET", config.Address, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
response, dataBytes, err := c.Do(ctx, request)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if response.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("check failed: URL=%s, status=%d", serverURL, response.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintln(os.Stderr, " SUCCESS: ", string(dataBytes))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// CheckConfig validates configuration files.
|
// CheckConfig validates configuration files.
|
||||||
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files ...string) int {
|
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files ...string) int {
|
||||||
failed := false
|
failed := false
|
||||||
|
@ -58,6 +58,7 @@ Check the resources for validity.
|
|||||||
|
|
||||||
| Flag | Description |
|
| Flag | Description |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
|
| <code class="text-nowrap">--http.config.file</code> | HTTP client configuration file for promtool to connect to Prometheus. |
|
||||||
| <code class="text-nowrap">--extended</code> | Print extended information related to the cardinality of the metrics. |
|
| <code class="text-nowrap">--extended</code> | Print extended information related to the cardinality of the metrics. |
|
||||||
|
|
||||||
|
|
||||||
@ -130,6 +131,36 @@ Check if the web config files are valid or not.
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##### `promtool check healthy`
|
||||||
|
|
||||||
|
Check if the Prometheus server is healthy.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###### Arguments
|
||||||
|
|
||||||
|
| Argument | Description |
|
||||||
|
| --- | --- |
|
||||||
|
| server | The URL of the Prometheus server to check (e.g. http://localhost:9090) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##### `promtool check ready`
|
||||||
|
|
||||||
|
Check if the Prometheus server is ready.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###### Arguments
|
||||||
|
|
||||||
|
| Argument | Description |
|
||||||
|
| --- | --- |
|
||||||
|
| server | The URL of the Prometheus server to check (e.g. http://localhost:9090) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
##### `promtool check rules`
|
##### `promtool check rules`
|
||||||
|
|
||||||
Check if the rule files are valid or not.
|
Check if the rule files are valid or not.
|
||||||
|
Loading…
Reference in New Issue
Block a user