diff --git a/contrib/prometheus-exporter/README b/contrib/prometheus-exporter/README index 5b163d1312..f948ac9b1a 100644 --- a/contrib/prometheus-exporter/README +++ b/contrib/prometheus-exporter/README @@ -73,7 +73,9 @@ exported. Here are examples: /metrics?scope=*&scope= # ==> no metrics will be exported /metrics?scope=&scope=global # ==> global metrics will be exported -* Filtering on servers state +* How do I prevent my prometheus instance to explode? + +** Filtering on servers state It is possible to exclude from returned metrics all servers in maintenance mode passing the parameter "no-maint" in the query-string. This parameter may help to @@ -82,6 +84,26 @@ manage dynamic provisionning. Note there is no consistency check on the servers state. So, if the state of a server changes while the exporter is running, only a part of the metrics for this server will be dumped. +prometheus example config: + +For server-template users: +- + params: + no-maint: + - empty + +** Scrap server health checks only + +All health checks status are dump through `state` label values. If you want to +scrap server health check status but prevent all server metrics to be saved, +except the server_check_status, you may configure prometheus that way: + +- + metric_relabel_configs: + - source_labels: ['__name__'] + regex: 'haproxy_(process_|frontend_|backend_|server_check_status).*' + action: keep + Exported metrics ------------------ @@ -292,30 +314,3 @@ Exported metrics | haproxy_server_used_connections_current | Current number of connections in use. | | haproxy_server_need_connections_current | Estimated needed number of connections. | +----------------------------------------------------+---------------------------------------------------------------------------+ - -Mapping of health check status : - - 0 : HCHK_STATUS_UNKNOWN (Unknown) - 1 : HCHK_STATUS_INI (Initializing) - - 4 : HCHK_STATUS_HANA (Health analyze detected enough consecutive errors) - - 5 : HCHK_STATUS_SOCKERR (Socket error) - - 6 : HCHK_STATUS_L4OK (L4 check passed, for example tcp connect) - 7 : HCHK_STATUS_L4TOUT (L4 timeout) - 8 : HCHK_STATUS_L4CON (L4 connection problem) - - 9 : HCHK_STATUS_L6OK (L6 check passed) - 10 : HCHK_STATUS_L6TOUT (L6 (SSL) timeout) - 11 : HCHK_STATUS_L6RSP (L6 invalid response - protocol error) - - 12 : HCHK_STATUS_L7TOUT (L7 (HTTP/SMTP) timeout) - 13 : HCHK_STATUS_L7RSP (L7 invalid response - protocol error) - 15 : HCHK_STATUS_L7OKD (L7 check passed) - 16 : HCHK_STATUS_L7OKCD (L7 check conditionally passed) - 17 : HCHK_STATUS_L7STS (L7 response error, for example HTTP 5xx) - - 18 : HCHK_STATUS_PROCERR (External process check failure) - 19 : HCHK_STATUS_PROCTOUT (External process check timeout) - 20 : HCHK_STATUS_PROCOK (External process check passed) diff --git a/contrib/prometheus-exporter/service-prometheus.c b/contrib/prometheus-exporter/service-prometheus.c index dbf4c7f396..df9c7cfdf2 100644 --- a/contrib/prometheus-exporter/service-prometheus.c +++ b/contrib/prometheus-exporter/service-prometheus.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -319,7 +320,7 @@ const struct ist promex_st_metric_desc[ST_F_TOTAL_FIELDS] = { [ST_F_RATE] = IST("Current number of sessions per second over last elapsed second."), [ST_F_RATE_LIM] = IST("Configured limit on new sessions per second."), [ST_F_RATE_MAX] = IST("Maximum observed number of sessions per second."), - [ST_F_CHECK_STATUS] = IST("Status of last health check (HCHK_STATUS_* values)."), + [ST_F_CHECK_STATUS] = IST("Status of last health check, per state label value."), [ST_F_CHECK_CODE] = IST("layer5-7 code, if available of the last health check."), [ST_F_CHECK_DURATION] = IST("Total duration of the latest server health check, in seconds."), [ST_F_HRSP_1XX] = IST("Total number of HTTP responses."), @@ -886,6 +887,7 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx) int ret = 1; double secs; enum promex_srv_state state; + const char *check_state; int i; for (;appctx->st2 < ST_F_TOTAL_FIELDS; appctx->st2++) { @@ -963,8 +965,19 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx) case ST_F_CHECK_STATUS: if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) != CHK_ST_ENABLED) goto next_sv; - val = mkf_u32(FN_OUTPUT, sv->check.status); - break; + + for (i = 0; i < HCHK_STATUS_SIZE; i++) { + if (get_check_status_result(i) < CHK_RES_FAILED) + continue; + val = mkf_u32(FO_STATUS, sv->check.status == i); + check_state = get_check_status_info(i); + labels[2].name = ist("state"); + labels[2].value = ist2(check_state, strlen(check_state)); + if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[appctx->st2], + &val, labels, &out, max)) + goto full; + } + goto next_sv; case ST_F_CHECK_CODE: if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) != CHK_ST_ENABLED) goto next_sv;