From 58c32978b2877f1f3f73cd37eaefc0fe4458039c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 25 Nov 2013 10:46:38 +0900 Subject: [PATCH] MEDIUM: Set rise and fall of agent checks to 1 This is achieved by moving rise and fall from struct server to struct check. After this move the behaviour of the primary check, server->check is unchanged. However, the secondary agent check, server->agent now has independent rise and fall values each of which are set to 1. The result is that receiving "fail", "stopped" or "down" just once from the agent will mark the server as down. And receiving a weight just once will allow the server to be marked up if its primary check is in good health. This opens up the scope to allow the rise and fall values of the agent check to be configurable, however this has not been implemented at this stage. Signed-off-by: Simon Horman --- include/common/defaults.h | 2 ++ include/types/server.h | 6 +++--- src/cfgparse.c | 28 +++++++++++++++------------- src/checks.c | 36 ++++++++++++++++++------------------ src/dumpstats.c | 14 +++++++------- src/proto_http.c | 2 +- src/server.c | 2 +- 7 files changed, 47 insertions(+), 43 deletions(-) diff --git a/include/common/defaults.h b/include/common/defaults.h index 30ab148f5..13fb1e320 100644 --- a/include/common/defaults.h +++ b/include/common/defaults.h @@ -127,6 +127,8 @@ #define DEF_CHKINTR 2000 #define DEF_FALLTIME 3 #define DEF_RISETIME 2 +#define DEF_AGENT_FALLTIME 1 +#define DEF_AGENT_RISETIME 1 #define DEF_CHECK_REQ "OPTIONS / HTTP/1.0\r\n" #define DEF_SMTP_CHECK_REQ "HELO localhost\r\n" #define DEF_LDAP_CHECK_REQ "\x30\x0c\x02\x01\x01\x60\x07\x02\x01\x03\x04\x00\x80\x00" diff --git a/include/types/server.h b/include/types/server.h index 73d426d10..51f70de3f 100644 --- a/include/types/server.h +++ b/include/types/server.h @@ -123,8 +123,9 @@ struct check { int inter, fastinter, downinter; /* checks: time in milliseconds */ int result; /* health-check result : SRV_CHK_* */ int state; /* health-check result : CHK_* */ - int health; /* 0 to server->rise-1 = bad; - * rise to server->rise+server->fall-1 = good */ + int health; /* 0 to rise-1 = bad; + * rise to rise+fall-1 = good */ + int rise, fall; /* time in iterations */ int type; /* Check type, one of PR_O2_*_CHK */ struct server *server; /* back-pointer to server */ }; @@ -157,7 +158,6 @@ struct server { struct server *tracknext, *track; /* next server in a tracking list, tracked server */ char *trackit; /* temporary variable to make assignment deferrable */ int consecutive_errors; /* current number of consecutive errors */ - int rise, fall; /* time in iterations */ int consecutive_errors_limit; /* number of consecutive errors that triggers an event */ short observe, onerror; /* observing mode: one of HANA_OBS_*; what to do on error: on of ANA_ONERR_* */ short onmarkeddown; /* what to do when marked down: one of HANA_ONMARKEDDOWN_* */ diff --git a/src/cfgparse.c b/src/cfgparse.c index 7df7de0da..8c289f1e4 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -1328,8 +1328,10 @@ void init_default_instance() defproxy.defsrv.agent.inter = DEF_CHKINTR; defproxy.defsrv.agent.fastinter = 0; defproxy.defsrv.agent.downinter = 0; - defproxy.defsrv.rise = DEF_RISETIME; - defproxy.defsrv.fall = DEF_FALLTIME; + defproxy.defsrv.check.rise = DEF_RISETIME; + defproxy.defsrv.check.fall = DEF_FALLTIME; + defproxy.defsrv.agent.rise = DEF_AGENT_RISETIME; + defproxy.defsrv.agent.fall = DEF_AGENT_FALLTIME; defproxy.defsrv.check.port = 0; defproxy.defsrv.agent.port = 0; defproxy.defsrv.maxqueue = 0; @@ -4287,8 +4289,6 @@ stats_error_parsing: newsrv->agent.inter = curproxy->defsrv.agent.inter; newsrv->agent.fastinter = curproxy->defsrv.agent.fastinter; newsrv->agent.downinter = curproxy->defsrv.agent.downinter; - newsrv->rise = curproxy->defsrv.rise; - newsrv->fall = curproxy->defsrv.fall; newsrv->maxqueue = curproxy->defsrv.maxqueue; newsrv->minconn = curproxy->defsrv.minconn; newsrv->maxconn = curproxy->defsrv.maxconn; @@ -4303,11 +4303,15 @@ stats_error_parsing: = curproxy->defsrv.iweight; newsrv->check.status = HCHK_STATUS_INI; - newsrv->check.health = newsrv->rise; /* up, but will fall down at first failure */ + newsrv->check.rise = curproxy->defsrv.check.rise; + newsrv->check.fall = curproxy->defsrv.check.fall; + newsrv->check.health = newsrv->check.rise; /* up, but will fall down at first failure */ newsrv->check.server = newsrv; newsrv->agent.status = HCHK_STATUS_INI; - newsrv->agent.health = newsrv->rise; /* up, but will fall down at first failure */ + newsrv->agent.rise = curproxy->defsrv.agent.rise; + newsrv->agent.fall = curproxy->defsrv.agent.fall; + newsrv->agent.health = newsrv->agent.rise; /* up, but will fall down at first failure */ newsrv->agent.server = newsrv; cur_arg = 3; @@ -4361,8 +4365,8 @@ stats_error_parsing: goto out; } - newsrv->rise = atol(args[cur_arg + 1]); - if (newsrv->rise <= 0) { + newsrv->check.rise = atol(args[cur_arg + 1]); + if (newsrv->check.rise <= 0) { Alert("parsing [%s:%d]: '%s' has to be > 0.\n", file, linenum, args[cur_arg]); err_code |= ERR_ALERT | ERR_FATAL; @@ -4370,13 +4374,11 @@ stats_error_parsing: } if (newsrv->check.health) - newsrv->check.health = newsrv->rise; - if (newsrv->agent.health) - newsrv->agent.health = newsrv->rise; + newsrv->check.health = newsrv->check.rise; cur_arg += 2; } else if (!strcmp(args[cur_arg], "fall")) { - newsrv->fall = atol(args[cur_arg + 1]); + newsrv->check.fall = atol(args[cur_arg + 1]); if (!*args[cur_arg + 1]) { Alert("parsing [%s:%d]: '%s' expects an integer argument.\n", @@ -4385,7 +4387,7 @@ stats_error_parsing: goto out; } - if (newsrv->fall <= 0) { + if (newsrv->check.fall <= 0) { Alert("parsing [%s:%d]: '%s' has to be > 0.\n", file, linenum, args[cur_arg]); err_code |= ERR_ALERT | ERR_FATAL; diff --git a/src/checks.c b/src/checks.c index 4ab29e501..2113c8c36 100644 --- a/src/checks.c +++ b/src/checks.c @@ -236,7 +236,7 @@ static void set_server_check_status(struct check *check, short status, const cha if (s->proxy->options2 & PR_O2_LOGHCHKS && (((check->health != 0) && (check->result & SRV_CHK_FAILED)) || - ((check->health != s->rise + s->fall - 1) && (check->result & SRV_CHK_PASSED)) || + ((check->health != check->rise + check->fall - 1) && (check->result & SRV_CHK_PASSED)) || ((s->state & SRV_GOINGDOWN) && !(check->result & SRV_CHK_DISABLE)) || (!(s->state & SRV_GOINGDOWN) && (check->result & SRV_CHK_DISABLE)))) { @@ -246,8 +246,8 @@ static void set_server_check_status(struct check *check, short status, const cha /* FIXME begin: calculate local version of the health/rise/fall/state */ health = check->health; - rise = s->rise; - fall = s->fall; + rise = check->rise; + fall = check->fall; state = s->state; if (check->result & SRV_CHK_FAILED) { @@ -401,10 +401,10 @@ void set_server_down(struct check *check) int xferred; if (s->state & SRV_MAINTAIN) { - check->health = s->rise; + check->health = check->rise; } - if ((s->state & SRV_RUNNING && check->health == s->rise) || s->track) { + if ((s->state & SRV_RUNNING && check->health == check->rise) || s->track) { int srv_was_paused = s->state & SRV_GOINGDOWN; int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act; @@ -468,11 +468,11 @@ void set_server_up(struct check *check) { unsigned int old_state = s->state; if (s->state & SRV_MAINTAIN) { - check->health = s->rise; + check->health = check->rise; } - if ((s->check.health >= s->rise && s->agent.health >= s->rise && - check->health == s->rise) || s->track) { + if ((s->check.health >= s->check.rise && s->agent.health >= s->agent.rise && + check->health == check->rise) || s->track) { if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) { if (s->proxy->last_change < now.tv_sec) // ignore negative times s->proxy->down_time += now.tv_sec - s->proxy->last_change; @@ -533,8 +533,8 @@ void set_server_up(struct check *check) { set_server_up(check); } - if (check->health >= s->rise) - check->health = s->rise + s->fall - 1; /* OK now */ + if (check->health >= check->rise) + check->health = check->rise + check->fall - 1; /* OK now */ } @@ -623,7 +623,7 @@ static void check_failed(struct check *check) if (check == &s->agent && check->status != HCHK_STATUS_L7STS) return; - if (check->health > s->rise) { + if (check->health > check->rise) { check->health--; /* still good */ s->counters.failed_checks++; } @@ -680,8 +680,8 @@ void health_adjust(struct server *s, short status) case HANA_ONERR_SUDDTH: /* simulate a pre-fatal failed health check */ - if (s->check.health > s->rise) - s->check.health = s->rise + 1; + if (s->check.health > s->check.rise) + s->check.health = s->check.rise + 1; /* no break - fall through */ @@ -694,7 +694,7 @@ void health_adjust(struct server *s, short status) case HANA_ONERR_MARKDWN: /* mark server down */ - s->check.health = s->rise; + s->check.health = s->check.rise; set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str); set_server_down(&s->check); @@ -734,7 +734,7 @@ static int httpchk_build_status_header(struct server *s, char *buffer) if (!(s->state & SRV_CHECKED)) sv_state = 6; /* should obviously never happen */ else if (s->state & SRV_RUNNING) { - if (s->check.health == s->rise + s->fall - 1) + if (s->check.health == s->check.rise + s->check.fall - 1) sv_state = 3; /* UP */ else sv_state = 2; /* going down */ @@ -750,8 +750,8 @@ static int httpchk_build_status_header(struct server *s, char *buffer) hlen += sprintf(buffer + hlen, srv_hlt_st[sv_state], - (s->state & SRV_RUNNING) ? (s->check.health - s->rise + 1) : (s->check.health), - (s->state & SRV_RUNNING) ? (s->fall) : (s->rise)); + (s->state & SRV_RUNNING) ? (s->check.health - s->check.rise + 1) : (s->check.health), + (s->state & SRV_RUNNING) ? (s->check.fall) : (s->check.rise)); hlen += sprintf(buffer + hlen, "; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d", s->proxy->id, s->id, @@ -1498,7 +1498,7 @@ static struct task *process_chk(struct task *t) set_server_disabled(check); } - if (check->health < s->rise + s->fall - 1) { + if (check->health < check->rise + check->fall - 1) { check->health++; /* was bad, stays for a while */ set_server_up(check); } diff --git a/src/dumpstats.c b/src/dumpstats.c index 99d16d7d8..3d60b09e0 100644 --- a/src/dumpstats.c +++ b/src/dumpstats.c @@ -1309,14 +1309,14 @@ static int stats_sock_parse_request(struct stream_interface *si, char *line) */ if (sv->track->state & SRV_RUNNING) { set_server_up(&sv->check); - sv->check.health = sv->rise; /* up, but will fall down at first failure */ + sv->check.health = sv->check.rise; /* up, but will fall down at first failure */ } else { sv->state &= ~SRV_MAINTAIN; set_server_down(&sv->check); } } else { set_server_up(&sv->check); - sv->check.health = sv->rise; /* up, but will fall down at first failure */ + sv->check.health = sv->check.rise; /* up, but will fall down at first failure */ } } @@ -2266,8 +2266,8 @@ static int stats_dump_sv_stats(struct stream_interface *si, struct proxy *px, in chunk_appendf(&trash, "%s ", human_time(now.tv_sec - ref->last_change, 1)); chunk_appendf(&trash, srv_hlt_st[state], - (ref->state & SRV_RUNNING) ? (ref->check.health - ref->rise + 1) : (ref->check.health), - (ref->state & SRV_RUNNING) ? (ref->fall) : (ref->rise)); + (ref->state & SRV_RUNNING) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health), + (ref->state & SRV_RUNNING) ? (ref->check.fall) : (ref->check.rise)); } if (sv->state & SRV_CHECKED) { @@ -2374,8 +2374,8 @@ static int stats_dump_sv_stats(struct stream_interface *si, struct proxy *px, in else chunk_appendf(&trash, srv_hlt_st[state], - (ref->state & SRV_RUNNING) ? (ref->check.health - ref->rise + 1) : (ref->check.health), - (ref->state & SRV_RUNNING) ? (ref->fall) : (ref->rise)); + (ref->state & SRV_RUNNING) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health), + (ref->state & SRV_RUNNING) ? (ref->check.fall) : (ref->check.rise)); chunk_appendf(&trash, /* weight, active, backup */ @@ -2944,7 +2944,7 @@ static int stats_dump_proxy_to_buffer(struct stream_interface *si, struct proxy if (!(svs->state & SRV_CHECKED)) sv_state = 6; else if (svs->state & SRV_RUNNING) { - if (svs->check.health == svs->rise + svs->fall - 1) + if (svs->check.health == svs->check.rise + svs->check.fall - 1) sv_state = 3; /* UP */ else sv_state = 2; /* going down */ diff --git a/src/proto_http.c b/src/proto_http.c index 5ad865b88..1921b9329 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -2920,7 +2920,7 @@ int http_process_req_stat_post(struct stream_interface *si, struct http_txn *txn if ((px->state != PR_STSTOPPED) && (sv->state & SRV_MAINTAIN)) { /* Already in maintenance, we can change the server state */ set_server_up(&sv->check); - sv->check.health = sv->rise; /* up, but will fall down at first failure */ + sv->check.health = sv->check.rise; /* up, but will fall down at first failure */ altered_servers++; total_servers++; } diff --git a/src/server.c b/src/server.c index efba25714..a316daa5c 100644 --- a/src/server.c +++ b/src/server.c @@ -34,7 +34,7 @@ int srv_getinter(const struct check *check) { const struct server *s = check->server; - if ((s->state & SRV_CHECKED) && (check->health == s->rise + s->fall - 1)) + if ((s->state & SRV_CHECKED) && (check->health == check->rise + check->fall - 1)) return check->inter; if (!(s->state & SRV_RUNNING) && check->health == 0)