mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2024-12-15 07:54:33 +00:00
MEDIUM: global: Add a "close-spread-time" option to spread soft-stop on time window
The new 'close-spread-time' global option can be used to spread idle and active HTTP connction closing after a SIGUSR1 signal is received. This allows to limit bursts of reconnections when too many idle connections are closed at once. Indeed, without this new mechanism, in case of soft-stop, all the idle connections would be closed at once (after the grace period is over), and all active HTTP connections would be closed by appending a "Connection: close" header to the next response that goes over it (or via a GOAWAY frame in case of HTTP2). This patch adds the support of this new option for HTTP as well as HTTP2 connections. It works differently on active and idle connections. On active connections, instead of sending systematically the GOAWAY frame or adding the 'Connection: close' header like before once the soft-stop has started, a random based on the remainder of the close window is calculated, and depending on its result we could decide to keep the connection alive. The random will be recalculated for any subsequent request/response on this connection so the GOAWAY will still end up being sent, but we might wait a few more round trips. This will ensure that goaways are distributed along a longer time window than before. On idle connections, a random factor is used when determining the expire field of the connection's task, which should naturally spread connection closings on the time window (see h2c_update_timeout). This feature request was described in GitHub issue #1614. This patch should be backported to 2.5. It depends on "BUG/MEDIUM: mux-h2: make use of http-request and keep-alive timeouts" which refactorized the timeout management of HTTP2 connections.
This commit is contained in:
parent
4e4b813bde
commit
b5d968d9b2
@ -1155,6 +1155,26 @@ chroot <jail dir>
|
||||
with superuser privileges. It is important to ensure that <jail_dir> is both
|
||||
empty and non-writable to anyone.
|
||||
|
||||
close-spread-time <time>
|
||||
Define a time window during which idle connections and active connections
|
||||
closing is spread in case of soft-stop. After a SIGUSR1 is received and the
|
||||
grace period is over (if any), the idle connections will all be closed at
|
||||
once if this option is not set, and active HTTP or HTTP2 connections will be
|
||||
ended after the next request is received, either by appending a "Connection:
|
||||
close" line to the HTTP response, or by sending a GOAWAY frame in case of
|
||||
HTTP2. When this option is set, connection closing will be spread over this
|
||||
set <time>.
|
||||
|
||||
Arguments :
|
||||
<time> is a time window (by default in milliseconds) during which
|
||||
connection closing will be spread during a soft-stop operation.
|
||||
|
||||
It is recommended to set this setting to a value lower than the one used in
|
||||
the "hard-stop-after" option if this one is used, so that all connections
|
||||
have a chance to gracefully close before the process stops.
|
||||
|
||||
See also: grace, hard-stop-after
|
||||
|
||||
cpu-map [auto:]<process-set>[/<thread-set>] <cpu-set>...
|
||||
On some operating systems, it is possible to bind a process or a thread to a
|
||||
specific CPU set. This means that the process or the thread will never run on
|
||||
|
@ -98,6 +98,8 @@ struct global {
|
||||
int mode;
|
||||
unsigned int hard_stop_after; /* maximum time allowed to perform a soft-stop */
|
||||
unsigned int grace_delay; /* grace delay between SIGUSR1 and soft-stop */
|
||||
unsigned int close_spread_time; /* time window during which connection closing is spread */
|
||||
unsigned int close_spread_end; /* end of close spread window */
|
||||
int maxconn, hardmaxconn;
|
||||
int maxsslconn;
|
||||
int ssl_session_max_cost; /* how many bytes an SSL session may cost */
|
||||
|
@ -169,6 +169,8 @@ volatile unsigned long stopping_thread_mask = 0; /* Threads acknowledged stoppin
|
||||
/* global options */
|
||||
struct global global = {
|
||||
.hard_stop_after = TICK_ETERNITY,
|
||||
.close_spread_time = TICK_ETERNITY,
|
||||
.close_spread_end = TICK_ETERNITY,
|
||||
.numa_cpu_mapping = 1,
|
||||
.nbthread = 0,
|
||||
.req_count = 0,
|
||||
|
58
src/mux_h1.c
58
src/mux_h1.c
@ -591,6 +591,8 @@ static int h1_avail_streams(struct connection *conn)
|
||||
/* Refresh the h1c task timeout if necessary */
|
||||
static void h1_refresh_timeout(struct h1c *h1c)
|
||||
{
|
||||
int is_idle_conn = 0;
|
||||
|
||||
if (h1c->task) {
|
||||
if (!(h1c->flags & H1C_F_ST_ALIVE) || (h1c->flags & H1C_F_ST_SHUTDOWN)) {
|
||||
/* half-closed or dead connections : switch to clientfin/serverfin
|
||||
@ -599,6 +601,7 @@ static void h1_refresh_timeout(struct h1c *h1c)
|
||||
*/
|
||||
h1c->task->expire = tick_add(now_ms, h1c->shut_timeout);
|
||||
TRACE_DEVEL("refreshing connection's timeout (dead or half-closed)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
|
||||
is_idle_conn = 1;
|
||||
}
|
||||
else if (b_data(&h1c->obuf)) {
|
||||
/* connection with pending outgoing data, need a timeout (server or client). */
|
||||
@ -609,6 +612,10 @@ static void h1_refresh_timeout(struct h1c *h1c)
|
||||
/* front connections waiting for a fully usable stream need a timeout. */
|
||||
h1c->task->expire = tick_add(now_ms, h1c->timeout);
|
||||
TRACE_DEVEL("refreshing connection's timeout (alive front h1c but not ready)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
|
||||
/* A frontend connection not yet ready could be treated the same way as an idle
|
||||
* one in case of soft-close.
|
||||
*/
|
||||
is_idle_conn = 1;
|
||||
}
|
||||
else {
|
||||
/* alive back connections of front connections with a conn-stream attached */
|
||||
@ -618,6 +625,36 @@ static void h1_refresh_timeout(struct h1c *h1c)
|
||||
|
||||
/* Finally set the idle expiration date if shorter */
|
||||
h1c->task->expire = tick_first(h1c->task->expire, h1c->idle_exp);
|
||||
|
||||
if ((h1c->px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) &&
|
||||
is_idle_conn && tick_isset(global.close_spread_end)) {
|
||||
/* If a soft-stop is in progress and a close-spread-time
|
||||
* is set, we want to spread idle connection closing roughly
|
||||
* evenly across the defined window. This should only
|
||||
* act on idle frontend connections.
|
||||
* If the window end is already in the past, we wake the
|
||||
* timeout task up immediately so that it can be closed.
|
||||
*/
|
||||
int remaining_window = tick_remain(now_ms, global.close_spread_end);
|
||||
if (remaining_window) {
|
||||
/* We don't need to reset the expire if it would
|
||||
* already happen before the close window end.
|
||||
*/
|
||||
if (tick_is_le(global.close_spread_end, h1c->task->expire)) {
|
||||
/* Set an expire value shorter than the current value
|
||||
* because the close spread window end comes earlier.
|
||||
*/
|
||||
h1c->task->expire = tick_add(now_ms, statistical_prng_range(remaining_window));
|
||||
TRACE_DEVEL("connection timeout set to value before close-spread window end", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* We are past the soft close window end, wake the timeout
|
||||
* task up immediately.
|
||||
*/
|
||||
task_wakeup(h1c->task, TASK_WOKEN_TIMER);
|
||||
}
|
||||
}
|
||||
TRACE_DEVEL("new expiration date", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn, 0, 0, (size_t[]){h1c->task->expire});
|
||||
task_queue(h1c->task);
|
||||
}
|
||||
@ -3011,8 +3048,25 @@ static int h1_process(struct h1c * h1c)
|
||||
if (!(h1c->flags & H1C_F_IS_BACK)) {
|
||||
if (unlikely(h1c->px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
|
||||
if (!(h1c->px->options & PR_O_IDLE_CLOSE_RESP) &&
|
||||
h1c->flags & H1C_F_WAIT_NEXT_REQ)
|
||||
goto release;
|
||||
h1c->flags & H1C_F_WAIT_NEXT_REQ) {
|
||||
|
||||
int send_close = 1;
|
||||
/* If a close-spread-time option is set, we want to avoid
|
||||
* closing all the active HTTP2 connections at once so we add a
|
||||
* random factor that will spread the closing.
|
||||
*/
|
||||
if (tick_isset(global.close_spread_end)) {
|
||||
int remaining_window = tick_remain(now_ms, global.close_spread_end);
|
||||
if (remaining_window) {
|
||||
/* This should increase the closing rate the
|
||||
* further along the window we are.
|
||||
*/
|
||||
send_close = (remaining_window <= statistical_prng_range(global.close_spread_time));
|
||||
}
|
||||
}
|
||||
if (send_close)
|
||||
goto release;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
60
src/mux_h2.c
60
src/mux_h2.c
@ -706,6 +706,8 @@ static inline int h2c_may_expire(const struct h2c *h2c)
|
||||
/* update h2c timeout if needed */
|
||||
static void h2c_update_timeout(struct h2c *h2c)
|
||||
{
|
||||
int is_idle_conn = 0;
|
||||
|
||||
TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
|
||||
|
||||
if (!h2c->task)
|
||||
@ -716,12 +718,14 @@ static void h2c_update_timeout(struct h2c *h2c)
|
||||
if (h2c->last_sid >= 0) {
|
||||
/* GOAWAY sent, closing in progress */
|
||||
h2c->task->expire = tick_add_ifset(now_ms, h2c->shut_timeout);
|
||||
is_idle_conn = 1;
|
||||
} else if (br_data(h2c->mbuf)) {
|
||||
/* pending output data: always the regular data timeout */
|
||||
h2c->task->expire = tick_add_ifset(now_ms, h2c->timeout);
|
||||
} else if (h2c->max_id > 0 && !b_data(&h2c->dbuf)) {
|
||||
/* idle after having seen one stream => keep-alive */
|
||||
h2c->task->expire = tick_add_ifset(h2c->idle_start, h2c->proxy->timeout.httpka);
|
||||
is_idle_conn = 1;
|
||||
} else {
|
||||
/* before first request, or started to deserialize a
|
||||
* new req => http-request, but only set, not refresh.
|
||||
@ -732,6 +736,37 @@ static void h2c_update_timeout(struct h2c *h2c)
|
||||
/* if a timeout above was not set, fall back to the default one */
|
||||
if (!tick_isset(h2c->task->expire))
|
||||
h2c->task->expire = tick_add_ifset(now_ms, h2c->timeout);
|
||||
|
||||
if ((h2c->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) &&
|
||||
is_idle_conn && tick_isset(global.close_spread_end)) {
|
||||
/* If a soft-stop is in progress and a close-spread-time
|
||||
* is set, we want to spread idle connection closing roughly
|
||||
* evenly across the defined window. This should only
|
||||
* act on idle frontend connections.
|
||||
* If the window end is already in the past, we wake the
|
||||
* timeout task up immediately so that it can be closed.
|
||||
*/
|
||||
int remaining_window = tick_remain(now_ms, global.close_spread_end);
|
||||
if (remaining_window) {
|
||||
/* We don't need to reset the expire if it would
|
||||
* already happen before the close window end.
|
||||
*/
|
||||
if (tick_isset(h2c->task->expire) &&
|
||||
tick_is_le(global.close_spread_end, h2c->task->expire)) {
|
||||
/* Set an expire value shorter than the current value
|
||||
* because the close spread window end comes earlier.
|
||||
*/
|
||||
h2c->task->expire = tick_add(now_ms, statistical_prng_range(remaining_window));
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* We are past the soft close window end, wake the timeout
|
||||
* task up immediately.
|
||||
*/
|
||||
task_wakeup(h2c->task, TASK_WOKEN_TIMER);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
h2c->task->expire = TICK_ETERNITY;
|
||||
}
|
||||
@ -4000,15 +4035,30 @@ static int h2_process(struct h2c *h2c)
|
||||
h2_send(h2c);
|
||||
|
||||
if (unlikely(h2c->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && !(h2c->flags & H2_CF_IS_BACK)) {
|
||||
int send_goaway = 1;
|
||||
/* If a close-spread-time option is set, we want to avoid
|
||||
* closing all the active HTTP2 connections at once so we add a
|
||||
* random factor that will spread the closing.
|
||||
*/
|
||||
if (tick_isset(global.close_spread_end)) {
|
||||
int remaining_window = tick_remain(now_ms, global.close_spread_end);
|
||||
if (remaining_window) {
|
||||
/* This should increase the closing rate the
|
||||
* further along the window we are. */
|
||||
send_goaway = (remaining_window <= statistical_prng_range(global.close_spread_time));
|
||||
}
|
||||
}
|
||||
/* frontend is stopping, reload likely in progress, let's try
|
||||
* to announce a graceful shutdown if not yet done. We don't
|
||||
* care if it fails, it will be tried again later.
|
||||
*/
|
||||
TRACE_STATE("proxy stopped, sending GOAWAY", H2_EV_H2C_WAKE|H2_EV_TX_FRAME, conn);
|
||||
if (!(h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED))) {
|
||||
if (h2c->last_sid < 0)
|
||||
h2c->last_sid = (1U << 31) - 1;
|
||||
h2c_send_goaway_error(h2c, NULL);
|
||||
if (send_goaway) {
|
||||
TRACE_STATE("proxy stopped, sending GOAWAY", H2_EV_H2C_WAKE|H2_EV_TX_FRAME, conn);
|
||||
if (!(h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED))) {
|
||||
if (h2c->last_sid < 0)
|
||||
h2c->last_sid = (1U << 31) - 1;
|
||||
h2c_send_goaway_error(h2c, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
33
src/proxy.c
33
src/proxy.c
@ -2046,6 +2046,34 @@ static int proxy_parse_hard_stop_after(char **args, int section_type, struct pro
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int proxy_parse_close_spread_time(char **args, int section_type, struct proxy *curpx,
|
||||
const struct proxy *defpx, const char *file, int line,
|
||||
char **err)
|
||||
{
|
||||
const char *res;
|
||||
|
||||
if (!*args[1]) {
|
||||
memprintf(err, "'%s' expects <time> as argument.\n", args[0]);
|
||||
return -1;
|
||||
}
|
||||
res = parse_time_err(args[1], &global.close_spread_time, TIME_UNIT_MS);
|
||||
if (res == PARSE_TIME_OVER) {
|
||||
memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
|
||||
args[1], args[0]);
|
||||
return -1;
|
||||
}
|
||||
else if (res == PARSE_TIME_UNDER) {
|
||||
memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
|
||||
args[1], args[0]);
|
||||
return -1;
|
||||
}
|
||||
else if (res) {
|
||||
memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct task *hard_stop(struct task *t, void *context, unsigned int state)
|
||||
{
|
||||
struct proxy *p;
|
||||
@ -2099,6 +2127,10 @@ static void do_soft_stop_now()
|
||||
/* disable busy polling to avoid cpu eating for the new process */
|
||||
global.tune.options &= ~GTUNE_BUSY_POLLING;
|
||||
|
||||
if (tick_isset(global.close_spread_time)) {
|
||||
global.close_spread_end = tick_add(now_ms, global.close_spread_time);
|
||||
}
|
||||
|
||||
/* schedule a hard-stop after a delay if needed */
|
||||
if (tick_isset(global.hard_stop_after)) {
|
||||
task = task_new_anywhere();
|
||||
@ -2501,6 +2533,7 @@ void proxy_adjust_all_maxconn()
|
||||
static struct cfg_kw_list cfg_kws = {ILH, {
|
||||
{ CFG_GLOBAL, "grace", proxy_parse_grace },
|
||||
{ CFG_GLOBAL, "hard-stop-after", proxy_parse_hard_stop_after },
|
||||
{ CFG_GLOBAL, "close-spread-time", proxy_parse_close_spread_time },
|
||||
{ CFG_LISTEN, "timeout", proxy_parse_timeout },
|
||||
{ CFG_LISTEN, "clitimeout", proxy_parse_timeout }, /* This keyword actually fails to parse, this line remains for better error messages. */
|
||||
{ CFG_LISTEN, "contimeout", proxy_parse_timeout }, /* This keyword actually fails to parse, this line remains for better error messages. */
|
||||
|
Loading…
Reference in New Issue
Block a user