BUG/MINOR: checks: restore legacy on-error fastinter behavior

With previous commit, 9e080bf ("BUG/MINOR: checks: make sure fastinter is used
even on forced transitions"), on-error mark-down|sudden-death|fail-check are
now working as expected.

However, on-error fastinter remains broken because srv_getinter(), used in
the above commit to check the expiration date, won't return fastinter interval
if server health is maxed out (which is the case with on-error fastinter mode).

To fix this, we introduce a check flag named CHK_ST_FASTINTER.
This flag is set when on-error is triggered. This way we can force
srv_getinter() to return fastinter interval whenever the flag is set.
The flag is automatically cleared as soon as the new check task expiry is
recalculated in process_chk_conn().
This restores original behavior prior to d114f4a ("MEDIUM: checks: spread the
checks load over random threads").

It must be backported to 2.7 along with the aforementioned commits.
This commit is contained in:
Aurelien DARRAGON 2022-12-07 12:17:24 +01:00 committed by Willy Tarreau
parent e57b702e2b
commit 7d541a91ec
3 changed files with 12 additions and 1 deletions

View File

@ -57,6 +57,7 @@ enum chk_result {
#define CHK_ST_CLOSE_CONN 0x0100 /* check is waiting that the connection gets closed */ #define CHK_ST_CLOSE_CONN 0x0100 /* check is waiting that the connection gets closed */
#define CHK_ST_PURGE 0x0200 /* check must be freed */ #define CHK_ST_PURGE 0x0200 /* check must be freed */
#define CHK_ST_SLEEPING 0x0400 /* check was sleeping */ #define CHK_ST_SLEEPING 0x0400 /* check was sleeping */
#define CHK_ST_FASTINTER 0x0800 /* force fastinter check */
/* check status */ /* check status */
enum healthcheck_status { enum healthcheck_status {

View File

@ -670,6 +670,11 @@ void __health_adjust(struct server *s, short status)
HA_SPIN_LOCK(SERVER_LOCK, &s->lock); HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
/* force fastinter for upcoming check
* (does nothing if fastinter is not enabled)
*/
s->check.state |= CHK_ST_FASTINTER;
switch (s->onerror) { switch (s->onerror) {
case HANA_ONERR_FASTINTER: case HANA_ONERR_FASTINTER:
/* force fastinter - nothing to do here as all modes force it */ /* force fastinter - nothing to do here as all modes force it */
@ -1285,6 +1290,10 @@ struct task *process_chk_conn(struct task *t, void *context, unsigned int state)
rv -= (int) (2 * rv * (statistical_prng() / 4294967295.0)); rv -= (int) (2 * rv * (statistical_prng() / 4294967295.0));
} }
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv)); t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
/* reset fastinter flag (if set) so that srv_getinter()
* only returns fastinter if server health is degraded
*/
check->state &= ~CHK_ST_FASTINTER;
} }
reschedule: reschedule:

View File

@ -124,7 +124,8 @@ int srv_getinter(const struct check *check)
{ {
const struct server *s = check->server; const struct server *s = check->server;
if ((check->state & CHK_ST_CONFIGURED) && (check->health == check->rise + check->fall - 1)) if ((check->state & (CHK_ST_CONFIGURED|CHK_ST_FASTINTER)) == CHK_ST_CONFIGURED &&
(check->health == check->rise + check->fall - 1))
return check->inter; return check->inter;
if ((s->next_state == SRV_ST_STOPPED) && check->health == 0) if ((s->next_state == SRV_ST_STOPPED) && check->health == 0)