mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2024-12-17 00:44:33 +00:00
MAJOR: agent: rework the response processing and support additional actions
We now retrieve a lot of information from a single line of response, which can be made up of various words delimited by spaces/tabs/commas. We try to arrange all this and report whatever unusual we detect. The agent now supports : - "up", "down", "stopped", "fail" for the operational states - "ready", "drain", "maint" for the administrative states - any "%" number for the weight - an optional reason after a "#" that can be reported on the stats page The line parser and processor should move to its own function so that we can reuse the exact same one for http-based agent checks later.
This commit is contained in:
parent
cf2924bc25
commit
81f5d94a0b
@ -8469,40 +8469,56 @@ addr <ipv4|ipv6>
|
||||
|
||||
agent-check
|
||||
Enable an auxiliary agent check which is run independently of a regular
|
||||
health check. An agent health check is performed by making a TCP
|
||||
connection to the port set by the "agent-port" parameter" and reading
|
||||
an ASCII string. The string should have one of the following forms:
|
||||
|
||||
* An ASCII representation of an positive integer percentage.
|
||||
e.g. "75%"
|
||||
health check. An agent health check is performed by making a TCP connection
|
||||
to the port set by the "agent-port" parameter and reading an ASCII string.
|
||||
The string is made of a series of words delimited by spaces, tabs or commas
|
||||
in any order, optionally terminated by '\r' and/or '\n', each consisting of :
|
||||
|
||||
- An ASCII representation of a positive integer percentage, e.g. "75%".
|
||||
Values in this format will set the weight proportional to the initial
|
||||
weight of a server as configured when haproxy starts.
|
||||
|
||||
* The string "drain".
|
||||
- The word "ready". This will turn the server's administrative state to the
|
||||
READY mode, thus cancelling any DRAIN or MAINT state
|
||||
|
||||
This will cause the weight of a server to be set to 0, and thus it will
|
||||
not accept any new connections other than those that are accepted via
|
||||
persistence.
|
||||
- The word "drain". This will turn the server's administrative state to the
|
||||
DRAIN mode, thus it will not accept any new connections other than those
|
||||
that are accepted via persistence.
|
||||
|
||||
* The string "down", optionally followed by a description string.
|
||||
- The word "maint". This will turn the server's administrative state to the
|
||||
MAINT mode, thus it will not accept any new connections at all, and health
|
||||
checks will be stopped.
|
||||
|
||||
Mark the server as down and log the description string as the reason.
|
||||
- The words "down", "failed", or "stopped", optionally followed by a
|
||||
description string after a sharp ('#'). All of these mark the server's
|
||||
operating state as DOWN, but since the word itself is reported on the stats
|
||||
page, the difference allows an administrator to know if the situation was
|
||||
expected or not : the service may intentionally be stopped, may appear up
|
||||
but fail some validity tests, or may be seen as down (eg: missing process,
|
||||
or port not responding).
|
||||
|
||||
* The string "stopped", optionally followed by a description string.
|
||||
- The word "up" sets back the server's operating state as UP if health checks
|
||||
also report that the service is accessible.
|
||||
|
||||
This currently has the same behaviour as "down".
|
||||
|
||||
* The string "fail", optionally followed by a description string.
|
||||
|
||||
This currently has the same behaviour as "down".
|
||||
Parameters which are not advertised by the agent are not changed. For
|
||||
example, an agent might be designed to monitor CPU usage and only report a
|
||||
relative weight and never interact with the operating status. Similarly, an
|
||||
agent could be designed as an end-user interface with 3 radio buttons
|
||||
allowing an administrator to change only the administrative state. However,
|
||||
it is important to consider that only the agent may revert its own actions,
|
||||
so if a server is set to DRAIN mode or to DOWN state using the agent, the
|
||||
agent must implement the other equivalent actions to bring the service into
|
||||
operations again.
|
||||
|
||||
Failure to connect to the agent is not considered an error as connectivity
|
||||
is tested by the regular health check which is enabled by the "check"
|
||||
parameter.
|
||||
parameter. Warning though, it is not a good idea to stop an agent after it
|
||||
reports "down", since only an agent reporting "up" will be able to turn the
|
||||
server up again. Note that the CLI on the Unix stats socket is also able to
|
||||
force an agent's result in order to workaround a bogus agent if needed.
|
||||
|
||||
Requires the ""agent-port" parameter to be set.
|
||||
See also the "agent-check" parameter.
|
||||
Requires the "agent-port" parameter to be set. See also the "agent-inter"
|
||||
parameter.
|
||||
|
||||
Supported in default-server: No
|
||||
|
||||
|
214
src/checks.c
214
src/checks.c
@ -880,13 +880,38 @@ static void event_srv_chk_r(struct connection *conn)
|
||||
break;
|
||||
|
||||
case PR_O2_LB_AGENT_CHK: {
|
||||
short status = HCHK_STATUS_L7RSP;
|
||||
const char *desc = "Unknown feedback string";
|
||||
const char *down_cmd = NULL;
|
||||
int disabled;
|
||||
char *p;
|
||||
int status = HCHK_STATUS_CHECKED;
|
||||
const char *hs = NULL; /* health status */
|
||||
const char *as = NULL; /* admin status */
|
||||
const char *ps = NULL; /* performance status */
|
||||
const char *err = NULL; /* first error to report */
|
||||
const char *wrn = NULL; /* first warning to report */
|
||||
char *cmd, *p;
|
||||
|
||||
/* We're getting an agent check response. The agent could
|
||||
* have been disabled in the mean time with a long check
|
||||
* still pending. It is important that we ignore the whole
|
||||
* response.
|
||||
*/
|
||||
if (!(check->server->agent.state & CHK_ST_ENABLED))
|
||||
break;
|
||||
|
||||
/* The agent supports strings made of a single line ended by the
|
||||
* first CR ('\r') or LF ('\n'). This line is composed of words
|
||||
* delimited by spaces (' '), tabs ('\t'), or commas (','). The
|
||||
* line may optionally contained a description of a state change
|
||||
* after a sharp ('#'), which is only considered if a health state
|
||||
* is announced.
|
||||
*
|
||||
* Words may be composed of :
|
||||
* - a numeric weight suffixed by the percent character ('%').
|
||||
* - a health status among "up", "down", "stopped", and "fail".
|
||||
* - an admin status among "ready", "drain", "maint".
|
||||
*
|
||||
* These words may appear in any order. If multiple words of the
|
||||
* same category appear, the last one wins.
|
||||
*/
|
||||
|
||||
/* get a complete line first */
|
||||
p = check->bi->data;
|
||||
while (*p && *p != '\n' && *p != '\r')
|
||||
p++;
|
||||
@ -899,57 +924,148 @@ static void event_srv_chk_r(struct connection *conn)
|
||||
set_server_check_status(check, check->status, "Ignoring incomplete line from agent");
|
||||
break;
|
||||
}
|
||||
|
||||
*p = 0;
|
||||
cmd = check->bi->data;
|
||||
|
||||
/*
|
||||
* The agent may have been disabled after a check was
|
||||
* initialised. If so, ignore weight changes and drain
|
||||
* settings from the agent. Note that the setting is
|
||||
* always present in the state of the agent the server,
|
||||
* regardless of if the agent is being run as a primary or
|
||||
* secondary check. That is, regardless of if the check
|
||||
* parameter of this function is the agent or check field
|
||||
* of the server.
|
||||
*/
|
||||
disabled = !(check->server->agent.state & CHK_ST_ENABLED);
|
||||
|
||||
if (strchr(check->bi->data, '%')) {
|
||||
if (disabled)
|
||||
break;
|
||||
desc = server_parse_weight_change_request(s, check->bi->data);
|
||||
if (!desc) {
|
||||
status = HCHK_STATUS_L7OKD;
|
||||
desc = check->bi->data;
|
||||
while (*cmd) {
|
||||
/* look for next word */
|
||||
if (*cmd == ' ' || *cmd == '\t' || *cmd == ',') {
|
||||
cmd++;
|
||||
continue;
|
||||
}
|
||||
} else if (!strcasecmp(check->bi->data, "drain")) {
|
||||
if (disabled)
|
||||
break;
|
||||
desc = server_parse_weight_change_request(s, "0%");
|
||||
if (!desc) {
|
||||
desc = "drain";
|
||||
status = HCHK_STATUS_L7OKD;
|
||||
}
|
||||
} else if (!strncasecmp(check->bi->data, "down", strlen("down"))) {
|
||||
down_cmd = "down";
|
||||
} else if (!strncasecmp(check->bi->data, "stopped", strlen("stopped"))) {
|
||||
down_cmd = "stopped";
|
||||
} else if (!strncasecmp(check->bi->data, "fail", strlen("fail"))) {
|
||||
down_cmd = "fail";
|
||||
}
|
||||
|
||||
if (down_cmd) {
|
||||
const char *end = check->bi->data + strlen(down_cmd);
|
||||
/*
|
||||
* The command keyword must terminated the string or
|
||||
* be followed by a blank.
|
||||
if (*cmd == '#') {
|
||||
/* this is the beginning of a health status description,
|
||||
* skip the sharp and blanks.
|
||||
*/
|
||||
cmd++;
|
||||
while (*cmd == '\t' || *cmd == ' ')
|
||||
cmd++;
|
||||
break;
|
||||
}
|
||||
|
||||
/* find the end of the word so that we have a null-terminated
|
||||
* word between <cmd> and <p>.
|
||||
*/
|
||||
if (end[0] == '\0' || end[0] == ' ' || end[0] == '\t') {
|
||||
status = HCHK_STATUS_L7STS;
|
||||
desc = check->bi->data;
|
||||
p = cmd + 1;
|
||||
while (*p && *p != '\t' && *p != ' ' && *p != '\n' && *p != ',')
|
||||
p++;
|
||||
if (*p)
|
||||
*p++ = 0;
|
||||
|
||||
/* first, health statuses */
|
||||
if (strcasecmp(cmd, "up") == 0) {
|
||||
check->health = check->rise + check->fall - 1;
|
||||
status = HCHK_STATUS_L7OKD;
|
||||
hs = cmd;
|
||||
}
|
||||
else if (strcasecmp(cmd, "down") == 0) {
|
||||
check->health = 0;
|
||||
status = HCHK_STATUS_L7STS;
|
||||
hs = cmd;
|
||||
}
|
||||
else if (strcasecmp(cmd, "stopped") == 0) {
|
||||
check->health = 0;
|
||||
status = HCHK_STATUS_L7STS;
|
||||
hs = cmd;
|
||||
}
|
||||
else if (strcasecmp(cmd, "fail") == 0) {
|
||||
check->health = 0;
|
||||
status = HCHK_STATUS_L7STS;
|
||||
hs = cmd;
|
||||
}
|
||||
/* admin statuses */
|
||||
else if (strcasecmp(cmd, "ready") == 0) {
|
||||
as = cmd;
|
||||
}
|
||||
else if (strcasecmp(cmd, "drain") == 0) {
|
||||
as = cmd;
|
||||
}
|
||||
else if (strcasecmp(cmd, "maint") == 0) {
|
||||
as = cmd;
|
||||
}
|
||||
/* else try to parse a weight here and keep the last one */
|
||||
else if (isdigit((unsigned char)*cmd) && strchr(cmd, '%') != NULL) {
|
||||
ps = cmd;
|
||||
}
|
||||
else {
|
||||
/* keep a copy of the first error */
|
||||
if (!err)
|
||||
err = cmd;
|
||||
}
|
||||
/* skip to next word */
|
||||
cmd = p;
|
||||
}
|
||||
/* here, cmd points either to \0 or to the beginning of a
|
||||
* description. Skip possible leading spaces.
|
||||
*/
|
||||
while (*cmd == ' ' || *cmd == '\n')
|
||||
cmd++;
|
||||
|
||||
/* First, update the admin status so that we avoid sending other
|
||||
* possibly useless warnings and can also update the health if
|
||||
* present after going back up.
|
||||
*/
|
||||
if (as) {
|
||||
if (strcasecmp(as, "drain") == 0)
|
||||
srv_adm_set_drain(check->server);
|
||||
else if (strcasecmp(as, "maint") == 0)
|
||||
srv_adm_set_maint(check->server);
|
||||
else
|
||||
srv_adm_set_ready(check->server);
|
||||
}
|
||||
|
||||
set_server_check_status(check, status, desc);
|
||||
/* now change weights */
|
||||
if (ps) {
|
||||
const char *msg;
|
||||
|
||||
msg = server_parse_weight_change_request(s, ps);
|
||||
if (!wrn || !*wrn)
|
||||
wrn = msg;
|
||||
}
|
||||
|
||||
/* and finally health status */
|
||||
if (hs) {
|
||||
/* We'll report some of the warnings and errors we have
|
||||
* here. Down reports are critical, we leave them untouched.
|
||||
* Lack of report, or report of 'UP' leaves the room for
|
||||
* ERR first, then WARN.
|
||||
*/
|
||||
const char *msg = cmd;
|
||||
struct chunk *t;
|
||||
|
||||
if (!*msg || status == HCHK_STATUS_L7OKD) {
|
||||
if (err && *err)
|
||||
msg = err;
|
||||
else if (wrn && *wrn)
|
||||
msg = wrn;
|
||||
}
|
||||
|
||||
t = get_trash_chunk();
|
||||
chunk_printf(t, "via agent : %s%s%s%s",
|
||||
hs, *msg ? " (" : "",
|
||||
msg, *msg ? ")" : "");
|
||||
|
||||
set_server_check_status(check, status, t->str);
|
||||
}
|
||||
else if (err && *err) {
|
||||
/* No status change but we'd like to report something odd.
|
||||
* Just report the current state and copy the message.
|
||||
*/
|
||||
chunk_printf(&trash, "agent reports an error : %s", err);
|
||||
set_server_check_status(check, status/*check->status*/, trash.str);
|
||||
|
||||
}
|
||||
else if (wrn && *wrn) {
|
||||
/* No status change but we'd like to report something odd.
|
||||
* Just report the current state and copy the message.
|
||||
*/
|
||||
chunk_printf(&trash, "agent warns : %s", wrn);
|
||||
set_server_check_status(check, status/*check->status*/, trash.str);
|
||||
}
|
||||
else
|
||||
set_server_check_status(check, status, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user