MINOR: promex: backend aggregated server check status

- add new metric: `haproxy_backend_agg_server_check_status`
  it counts the number of servers matching a specific check status
  this permits to exclude per server check status as the usage is often
  to rely on the total. Indeed in large setup having thousands of
  servers per backend the memory impact is not neglible to store the per
  server metric.
- realign promex_str_metrics array

quite simple implementation - we could improve it later by adding an
internal state to the prometheus exporter, thus to avoid counting at
every dump.

this patch is an attempt to close github issue #1312. It may bebackported
to 2.4 if requested.

Signed-off-by: William Dauchy <wdauchy@gmail.com>
This commit is contained in:
William Dauchy 2021-11-07 10:18:47 +01:00 committed by Christopher Faulet
parent db8a1f391d
commit 42d7c402d5
3 changed files with 131 additions and 103 deletions

View File

@ -289,6 +289,7 @@ const struct promex_metric promex_st_metrics[ST_F_TOTAL_FIELDS] = {
[ST_F_USED_CONN_CUR] = { .n = IST("used_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) },
[ST_F_NEED_CONN_EST] = { .n = IST("need_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) },
[ST_F_UWEIGHT] = { .n = IST("uweight"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) },
[ST_F_AGG_SRV_CHECK_STATUS] = { .n = IST("agg_server_check_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) },
};
/* Description of overridden stats fields */
@ -782,6 +783,7 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx)
{
static struct ist prefix = IST("haproxy_backend_");
struct proxy *px;
struct server *sv;
struct field val;
struct channel *chn = si_ic(appctx->owner);
struct ist out = ist2(trash.area, 0);
@ -789,7 +791,8 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx)
struct field *stats = stat_l[STATS_DOMAIN_PROXY];
int ret = 1;
double secs;
enum promex_back_state state;
enum promex_back_state bkd_state;
enum promex_srv_state srv_state;
for (;appctx->st2 < ST_F_TOTAL_FIELDS; appctx->st2++) {
if (!(promex_st_metrics[appctx->st2].flags & appctx->ctx.stats.flags))
@ -797,6 +800,7 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx)
while (appctx->ctx.stats.obj1) {
struct promex_label labels[PROMEX_MAX_LABELS-1] = {};
unsigned int srv_state_count[PROMEX_SRV_STATE_COUNT] = { 0 };
px = appctx->ctx.stats.obj1;
@ -811,12 +815,31 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx)
return -1;
switch (appctx->st2) {
case ST_F_AGG_SRV_CHECK_STATUS:
if (!px->srv)
goto next_px;
sv = px->srv;
while (sv) {
srv_state = promex_srv_status(sv);
srv_state_count[srv_state] += 1;
sv = sv->next;
}
for (; appctx->ctx.stats.st_code < PROMEX_SRV_STATE_COUNT; appctx->ctx.stats.st_code++) {
val = mkf_u32(FN_GAUGE, srv_state_count[appctx->ctx.stats.st_code]);
labels[1].name = ist("state");
labels[1].value = promex_srv_st[appctx->ctx.stats.st_code];
if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[appctx->st2],
&val, labels, &out, max))
goto full;
}
appctx->ctx.stats.st_code = 0;
goto next_px;
case ST_F_STATUS:
state = ((px->lbprm.tot_weight > 0 || !px->srv) ? 1 : 0);
bkd_state = ((px->lbprm.tot_weight > 0 || !px->srv) ? 1 : 0);
for (; appctx->ctx.stats.st_code < PROMEX_BACK_STATE_COUNT; appctx->ctx.stats.st_code++) {
labels[1].name = ist("state");
labels[1].value = promex_back_st[appctx->ctx.stats.st_code];
val = mkf_u32(FO_STATUS, state == appctx->ctx.stats.st_code);
val = mkf_u32(FO_STATUS, bkd_state == appctx->ctx.stats.st_code);
if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[appctx->st2],
&val, labels, &out, max))
goto full;

View File

@ -443,6 +443,7 @@ enum stat_field {
ST_F_USED_CONN_CUR,
ST_F_NEED_CONN_EST,
ST_F_UWEIGHT,
ST_F_AGG_SRV_CHECK_STATUS,
/* must always be the last one */
ST_F_TOTAL_FIELDS

View File

@ -259,6 +259,7 @@ const struct name_desc stat_fields[ST_F_TOTAL_FIELDS] = {
[ST_F_USED_CONN_CUR] = { .name = "used_conn_cur", .desc = "Current number of connections in use"},
[ST_F_NEED_CONN_EST] = { .name = "need_conn_est", .desc = "Estimated needed number of connections"},
[ST_F_UWEIGHT] = { .name = "uweight", .desc = "Server's user weight, or sum of active servers' user weights for a backend" },
[ST_F_AGG_SRV_CHECK_STATUS] = { .name = "agg_server_check_status", .desc = "Backend's aggregated gauge of servers' state check status" },
};
/* one line of info */
@ -2658,6 +2659,9 @@ int stats_fill_be_stats(struct proxy *px, int flags, struct field *stats, int le
chunk_appendf(out, " (%d/%d)", nbup, nbsrv);
metric = mkf_str(FO_STATUS, fld);
break;
case ST_F_AGG_SRV_CHECK_STATUS:
metric = mkf_u32(FN_GAUGE, 0);
break;
case ST_F_WEIGHT:
metric = mkf_u32(FN_AVG, (px->lbprm.tot_weight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv);
break;