mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-01-05 19:52:14 +00:00
MEDIUM: Set rise and fall of agent checks to 1
This is achieved by moving rise and fall from struct server to struct check. After this move the behaviour of the primary check, server->check is unchanged. However, the secondary agent check, server->agent now has independent rise and fall values each of which are set to 1. The result is that receiving "fail", "stopped" or "down" just once from the agent will mark the server as down. And receiving a weight just once will allow the server to be marked up if its primary check is in good health. This opens up the scope to allow the rise and fall values of the agent check to be configurable, however this has not been implemented at this stage. Signed-off-by: Simon Horman <horms@verge.net.au>
This commit is contained in:
parent
2f1f955c8c
commit
58c32978b2
@ -127,6 +127,8 @@
|
||||
#define DEF_CHKINTR 2000
|
||||
#define DEF_FALLTIME 3
|
||||
#define DEF_RISETIME 2
|
||||
#define DEF_AGENT_FALLTIME 1
|
||||
#define DEF_AGENT_RISETIME 1
|
||||
#define DEF_CHECK_REQ "OPTIONS / HTTP/1.0\r\n"
|
||||
#define DEF_SMTP_CHECK_REQ "HELO localhost\r\n"
|
||||
#define DEF_LDAP_CHECK_REQ "\x30\x0c\x02\x01\x01\x60\x07\x02\x01\x03\x04\x00\x80\x00"
|
||||
|
@ -123,8 +123,9 @@ struct check {
|
||||
int inter, fastinter, downinter; /* checks: time in milliseconds */
|
||||
int result; /* health-check result : SRV_CHK_* */
|
||||
int state; /* health-check result : CHK_* */
|
||||
int health; /* 0 to server->rise-1 = bad;
|
||||
* rise to server->rise+server->fall-1 = good */
|
||||
int health; /* 0 to rise-1 = bad;
|
||||
* rise to rise+fall-1 = good */
|
||||
int rise, fall; /* time in iterations */
|
||||
int type; /* Check type, one of PR_O2_*_CHK */
|
||||
struct server *server; /* back-pointer to server */
|
||||
};
|
||||
@ -157,7 +158,6 @@ struct server {
|
||||
struct server *tracknext, *track; /* next server in a tracking list, tracked server */
|
||||
char *trackit; /* temporary variable to make assignment deferrable */
|
||||
int consecutive_errors; /* current number of consecutive errors */
|
||||
int rise, fall; /* time in iterations */
|
||||
int consecutive_errors_limit; /* number of consecutive errors that triggers an event */
|
||||
short observe, onerror; /* observing mode: one of HANA_OBS_*; what to do on error: on of ANA_ONERR_* */
|
||||
short onmarkeddown; /* what to do when marked down: one of HANA_ONMARKEDDOWN_* */
|
||||
|
@ -1328,8 +1328,10 @@ void init_default_instance()
|
||||
defproxy.defsrv.agent.inter = DEF_CHKINTR;
|
||||
defproxy.defsrv.agent.fastinter = 0;
|
||||
defproxy.defsrv.agent.downinter = 0;
|
||||
defproxy.defsrv.rise = DEF_RISETIME;
|
||||
defproxy.defsrv.fall = DEF_FALLTIME;
|
||||
defproxy.defsrv.check.rise = DEF_RISETIME;
|
||||
defproxy.defsrv.check.fall = DEF_FALLTIME;
|
||||
defproxy.defsrv.agent.rise = DEF_AGENT_RISETIME;
|
||||
defproxy.defsrv.agent.fall = DEF_AGENT_FALLTIME;
|
||||
defproxy.defsrv.check.port = 0;
|
||||
defproxy.defsrv.agent.port = 0;
|
||||
defproxy.defsrv.maxqueue = 0;
|
||||
@ -4287,8 +4289,6 @@ stats_error_parsing:
|
||||
newsrv->agent.inter = curproxy->defsrv.agent.inter;
|
||||
newsrv->agent.fastinter = curproxy->defsrv.agent.fastinter;
|
||||
newsrv->agent.downinter = curproxy->defsrv.agent.downinter;
|
||||
newsrv->rise = curproxy->defsrv.rise;
|
||||
newsrv->fall = curproxy->defsrv.fall;
|
||||
newsrv->maxqueue = curproxy->defsrv.maxqueue;
|
||||
newsrv->minconn = curproxy->defsrv.minconn;
|
||||
newsrv->maxconn = curproxy->defsrv.maxconn;
|
||||
@ -4303,11 +4303,15 @@ stats_error_parsing:
|
||||
= curproxy->defsrv.iweight;
|
||||
|
||||
newsrv->check.status = HCHK_STATUS_INI;
|
||||
newsrv->check.health = newsrv->rise; /* up, but will fall down at first failure */
|
||||
newsrv->check.rise = curproxy->defsrv.check.rise;
|
||||
newsrv->check.fall = curproxy->defsrv.check.fall;
|
||||
newsrv->check.health = newsrv->check.rise; /* up, but will fall down at first failure */
|
||||
newsrv->check.server = newsrv;
|
||||
|
||||
newsrv->agent.status = HCHK_STATUS_INI;
|
||||
newsrv->agent.health = newsrv->rise; /* up, but will fall down at first failure */
|
||||
newsrv->agent.rise = curproxy->defsrv.agent.rise;
|
||||
newsrv->agent.fall = curproxy->defsrv.agent.fall;
|
||||
newsrv->agent.health = newsrv->agent.rise; /* up, but will fall down at first failure */
|
||||
newsrv->agent.server = newsrv;
|
||||
|
||||
cur_arg = 3;
|
||||
@ -4361,8 +4365,8 @@ stats_error_parsing:
|
||||
goto out;
|
||||
}
|
||||
|
||||
newsrv->rise = atol(args[cur_arg + 1]);
|
||||
if (newsrv->rise <= 0) {
|
||||
newsrv->check.rise = atol(args[cur_arg + 1]);
|
||||
if (newsrv->check.rise <= 0) {
|
||||
Alert("parsing [%s:%d]: '%s' has to be > 0.\n",
|
||||
file, linenum, args[cur_arg]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
@ -4370,13 +4374,11 @@ stats_error_parsing:
|
||||
}
|
||||
|
||||
if (newsrv->check.health)
|
||||
newsrv->check.health = newsrv->rise;
|
||||
if (newsrv->agent.health)
|
||||
newsrv->agent.health = newsrv->rise;
|
||||
newsrv->check.health = newsrv->check.rise;
|
||||
cur_arg += 2;
|
||||
}
|
||||
else if (!strcmp(args[cur_arg], "fall")) {
|
||||
newsrv->fall = atol(args[cur_arg + 1]);
|
||||
newsrv->check.fall = atol(args[cur_arg + 1]);
|
||||
|
||||
if (!*args[cur_arg + 1]) {
|
||||
Alert("parsing [%s:%d]: '%s' expects an integer argument.\n",
|
||||
@ -4385,7 +4387,7 @@ stats_error_parsing:
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (newsrv->fall <= 0) {
|
||||
if (newsrv->check.fall <= 0) {
|
||||
Alert("parsing [%s:%d]: '%s' has to be > 0.\n",
|
||||
file, linenum, args[cur_arg]);
|
||||
err_code |= ERR_ALERT | ERR_FATAL;
|
||||
|
36
src/checks.c
36
src/checks.c
@ -236,7 +236,7 @@ static void set_server_check_status(struct check *check, short status, const cha
|
||||
|
||||
if (s->proxy->options2 & PR_O2_LOGHCHKS &&
|
||||
(((check->health != 0) && (check->result & SRV_CHK_FAILED)) ||
|
||||
((check->health != s->rise + s->fall - 1) && (check->result & SRV_CHK_PASSED)) ||
|
||||
((check->health != check->rise + check->fall - 1) && (check->result & SRV_CHK_PASSED)) ||
|
||||
((s->state & SRV_GOINGDOWN) && !(check->result & SRV_CHK_DISABLE)) ||
|
||||
(!(s->state & SRV_GOINGDOWN) && (check->result & SRV_CHK_DISABLE)))) {
|
||||
|
||||
@ -246,8 +246,8 @@ static void set_server_check_status(struct check *check, short status, const cha
|
||||
|
||||
/* FIXME begin: calculate local version of the health/rise/fall/state */
|
||||
health = check->health;
|
||||
rise = s->rise;
|
||||
fall = s->fall;
|
||||
rise = check->rise;
|
||||
fall = check->fall;
|
||||
state = s->state;
|
||||
|
||||
if (check->result & SRV_CHK_FAILED) {
|
||||
@ -401,10 +401,10 @@ void set_server_down(struct check *check)
|
||||
int xferred;
|
||||
|
||||
if (s->state & SRV_MAINTAIN) {
|
||||
check->health = s->rise;
|
||||
check->health = check->rise;
|
||||
}
|
||||
|
||||
if ((s->state & SRV_RUNNING && check->health == s->rise) || s->track) {
|
||||
if ((s->state & SRV_RUNNING && check->health == check->rise) || s->track) {
|
||||
int srv_was_paused = s->state & SRV_GOINGDOWN;
|
||||
int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
|
||||
|
||||
@ -468,11 +468,11 @@ void set_server_up(struct check *check) {
|
||||
unsigned int old_state = s->state;
|
||||
|
||||
if (s->state & SRV_MAINTAIN) {
|
||||
check->health = s->rise;
|
||||
check->health = check->rise;
|
||||
}
|
||||
|
||||
if ((s->check.health >= s->rise && s->agent.health >= s->rise &&
|
||||
check->health == s->rise) || s->track) {
|
||||
if ((s->check.health >= s->check.rise && s->agent.health >= s->agent.rise &&
|
||||
check->health == check->rise) || s->track) {
|
||||
if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
|
||||
if (s->proxy->last_change < now.tv_sec) // ignore negative times
|
||||
s->proxy->down_time += now.tv_sec - s->proxy->last_change;
|
||||
@ -533,8 +533,8 @@ void set_server_up(struct check *check) {
|
||||
set_server_up(check);
|
||||
}
|
||||
|
||||
if (check->health >= s->rise)
|
||||
check->health = s->rise + s->fall - 1; /* OK now */
|
||||
if (check->health >= check->rise)
|
||||
check->health = check->rise + check->fall - 1; /* OK now */
|
||||
|
||||
}
|
||||
|
||||
@ -623,7 +623,7 @@ static void check_failed(struct check *check)
|
||||
if (check == &s->agent && check->status != HCHK_STATUS_L7STS)
|
||||
return;
|
||||
|
||||
if (check->health > s->rise) {
|
||||
if (check->health > check->rise) {
|
||||
check->health--; /* still good */
|
||||
s->counters.failed_checks++;
|
||||
}
|
||||
@ -680,8 +680,8 @@ void health_adjust(struct server *s, short status)
|
||||
|
||||
case HANA_ONERR_SUDDTH:
|
||||
/* simulate a pre-fatal failed health check */
|
||||
if (s->check.health > s->rise)
|
||||
s->check.health = s->rise + 1;
|
||||
if (s->check.health > s->check.rise)
|
||||
s->check.health = s->check.rise + 1;
|
||||
|
||||
/* no break - fall through */
|
||||
|
||||
@ -694,7 +694,7 @@ void health_adjust(struct server *s, short status)
|
||||
|
||||
case HANA_ONERR_MARKDWN:
|
||||
/* mark server down */
|
||||
s->check.health = s->rise;
|
||||
s->check.health = s->check.rise;
|
||||
set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str);
|
||||
set_server_down(&s->check);
|
||||
|
||||
@ -734,7 +734,7 @@ static int httpchk_build_status_header(struct server *s, char *buffer)
|
||||
if (!(s->state & SRV_CHECKED))
|
||||
sv_state = 6; /* should obviously never happen */
|
||||
else if (s->state & SRV_RUNNING) {
|
||||
if (s->check.health == s->rise + s->fall - 1)
|
||||
if (s->check.health == s->check.rise + s->check.fall - 1)
|
||||
sv_state = 3; /* UP */
|
||||
else
|
||||
sv_state = 2; /* going down */
|
||||
@ -750,8 +750,8 @@ static int httpchk_build_status_header(struct server *s, char *buffer)
|
||||
|
||||
hlen += sprintf(buffer + hlen,
|
||||
srv_hlt_st[sv_state],
|
||||
(s->state & SRV_RUNNING) ? (s->check.health - s->rise + 1) : (s->check.health),
|
||||
(s->state & SRV_RUNNING) ? (s->fall) : (s->rise));
|
||||
(s->state & SRV_RUNNING) ? (s->check.health - s->check.rise + 1) : (s->check.health),
|
||||
(s->state & SRV_RUNNING) ? (s->check.fall) : (s->check.rise));
|
||||
|
||||
hlen += sprintf(buffer + hlen, "; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
|
||||
s->proxy->id, s->id,
|
||||
@ -1498,7 +1498,7 @@ static struct task *process_chk(struct task *t)
|
||||
set_server_disabled(check);
|
||||
}
|
||||
|
||||
if (check->health < s->rise + s->fall - 1) {
|
||||
if (check->health < check->rise + check->fall - 1) {
|
||||
check->health++; /* was bad, stays for a while */
|
||||
set_server_up(check);
|
||||
}
|
||||
|
@ -1309,14 +1309,14 @@ static int stats_sock_parse_request(struct stream_interface *si, char *line)
|
||||
*/
|
||||
if (sv->track->state & SRV_RUNNING) {
|
||||
set_server_up(&sv->check);
|
||||
sv->check.health = sv->rise; /* up, but will fall down at first failure */
|
||||
sv->check.health = sv->check.rise; /* up, but will fall down at first failure */
|
||||
} else {
|
||||
sv->state &= ~SRV_MAINTAIN;
|
||||
set_server_down(&sv->check);
|
||||
}
|
||||
} else {
|
||||
set_server_up(&sv->check);
|
||||
sv->check.health = sv->rise; /* up, but will fall down at first failure */
|
||||
sv->check.health = sv->check.rise; /* up, but will fall down at first failure */
|
||||
}
|
||||
}
|
||||
|
||||
@ -2266,8 +2266,8 @@ static int stats_dump_sv_stats(struct stream_interface *si, struct proxy *px, in
|
||||
chunk_appendf(&trash, "%s ", human_time(now.tv_sec - ref->last_change, 1));
|
||||
chunk_appendf(&trash,
|
||||
srv_hlt_st[state],
|
||||
(ref->state & SRV_RUNNING) ? (ref->check.health - ref->rise + 1) : (ref->check.health),
|
||||
(ref->state & SRV_RUNNING) ? (ref->fall) : (ref->rise));
|
||||
(ref->state & SRV_RUNNING) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health),
|
||||
(ref->state & SRV_RUNNING) ? (ref->check.fall) : (ref->check.rise));
|
||||
}
|
||||
|
||||
if (sv->state & SRV_CHECKED) {
|
||||
@ -2374,8 +2374,8 @@ static int stats_dump_sv_stats(struct stream_interface *si, struct proxy *px, in
|
||||
else
|
||||
chunk_appendf(&trash,
|
||||
srv_hlt_st[state],
|
||||
(ref->state & SRV_RUNNING) ? (ref->check.health - ref->rise + 1) : (ref->check.health),
|
||||
(ref->state & SRV_RUNNING) ? (ref->fall) : (ref->rise));
|
||||
(ref->state & SRV_RUNNING) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health),
|
||||
(ref->state & SRV_RUNNING) ? (ref->check.fall) : (ref->check.rise));
|
||||
|
||||
chunk_appendf(&trash,
|
||||
/* weight, active, backup */
|
||||
@ -2944,7 +2944,7 @@ static int stats_dump_proxy_to_buffer(struct stream_interface *si, struct proxy
|
||||
if (!(svs->state & SRV_CHECKED))
|
||||
sv_state = 6;
|
||||
else if (svs->state & SRV_RUNNING) {
|
||||
if (svs->check.health == svs->rise + svs->fall - 1)
|
||||
if (svs->check.health == svs->check.rise + svs->check.fall - 1)
|
||||
sv_state = 3; /* UP */
|
||||
else
|
||||
sv_state = 2; /* going down */
|
||||
|
@ -2920,7 +2920,7 @@ int http_process_req_stat_post(struct stream_interface *si, struct http_txn *txn
|
||||
if ((px->state != PR_STSTOPPED) && (sv->state & SRV_MAINTAIN)) {
|
||||
/* Already in maintenance, we can change the server state */
|
||||
set_server_up(&sv->check);
|
||||
sv->check.health = sv->rise; /* up, but will fall down at first failure */
|
||||
sv->check.health = sv->check.rise; /* up, but will fall down at first failure */
|
||||
altered_servers++;
|
||||
total_servers++;
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ int srv_getinter(const struct check *check)
|
||||
{
|
||||
const struct server *s = check->server;
|
||||
|
||||
if ((s->state & SRV_CHECKED) && (check->health == s->rise + s->fall - 1))
|
||||
if ((s->state & SRV_CHECKED) && (check->health == check->rise + check->fall - 1))
|
||||
return check->inter;
|
||||
|
||||
if (!(s->state & SRV_RUNNING) && check->health == 0)
|
||||
|
Loading…
Reference in New Issue
Block a user