mirror of
https://github.com/ceph/ceph
synced 2025-03-25 11:48:05 +00:00
msgr: simple exponential backoff, with tunable initial and max delay
This commit is contained in:
parent
329f5c6490
commit
5d33a74fba
@ -341,8 +341,8 @@ static struct config_option config_optionsp[] = {
|
||||
OPTION(clock_lock, 0, OPT_BOOL, false),
|
||||
OPTION(clock_tare, 0, OPT_BOOL, false),
|
||||
OPTION(ms_tcp_nodelay, 0, OPT_BOOL, true),
|
||||
OPTION(ms_retry_interval, 0, OPT_DOUBLE, 2.0), // how often to attempt reconnect
|
||||
OPTION(ms_fail_interval, 0, OPT_DOUBLE, 15.0), // fail after this long
|
||||
OPTION(ms_initial_backoff, 0, OPT_DOUBLE, .2),
|
||||
OPTION(ms_max_backoff, 0, OPT_DOUBLE, 15.0),
|
||||
OPTION(ms_die_on_failure, 0, OPT_BOOL, false),
|
||||
OPTION(ms_nocrc, 0, OPT_BOOL, false),
|
||||
OPTION(ms_die_on_bad_msg, 0, OPT_BOOL, false),
|
||||
|
@ -117,8 +117,8 @@ struct md_config_t {
|
||||
*/
|
||||
|
||||
bool ms_tcp_nodelay;
|
||||
double ms_retry_interval;
|
||||
double ms_fail_interval;
|
||||
double ms_initial_backoff;
|
||||
double ms_max_backoff;
|
||||
bool ms_die_on_failure;
|
||||
bool ms_nocrc;
|
||||
bool ms_die_on_bad_msg;
|
||||
|
@ -1000,7 +1000,7 @@ int SimpleMessenger::Pipe::connect()
|
||||
state = STATE_OPEN;
|
||||
connect_seq = cseq + 1;
|
||||
assert(connect_seq == reply.connect_seq);
|
||||
first_fault = last_attempt = utime_t();
|
||||
backoff = utime_t();
|
||||
dout(20) << "connect success " << connect_seq << ", lossy = " << policy.lossy << dendl;
|
||||
|
||||
if (!reader_running) {
|
||||
@ -1124,33 +1124,26 @@ void SimpleMessenger::Pipe::fault(bool onconnect, bool onread)
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
utime_t now = g_clock.now();
|
||||
if (state != STATE_CONNECTING) {
|
||||
if (!onconnect)
|
||||
dout(0) << "fault initiating reconnect" << dendl;
|
||||
connect_seq++;
|
||||
state = STATE_CONNECTING;
|
||||
first_fault = now;
|
||||
} else if (first_fault.sec() == 0) {
|
||||
backoff = utime_t();
|
||||
} else if (backoff == utime_t()) {
|
||||
if (!onconnect)
|
||||
dout(0) << "fault first fault" << dendl;
|
||||
first_fault = now;
|
||||
backoff.set_from_double(g_conf.ms_initial_backoff);
|
||||
} else {
|
||||
|
||||
#warning clean me up
|
||||
|
||||
utime_t failinterval = now - first_fault;
|
||||
utime_t retryinterval = now - last_attempt;
|
||||
if (!onconnect) dout(10) << "fault failure was " << failinterval
|
||||
<< " ago, last attempt was at " << last_attempt
|
||||
<< ", " << retryinterval << " ago" << dendl;
|
||||
// wait
|
||||
now += 1.0;
|
||||
dout(10) << "fault waiting until " << now << dendl;
|
||||
cond.WaitUntil(lock, now);
|
||||
dout(10) << "fault waiting " << backoff << dendl;
|
||||
cond.WaitInterval(lock, backoff);
|
||||
backoff += backoff;
|
||||
if (backoff > g_conf.ms_max_backoff)
|
||||
backoff.set_from_double(g_conf.ms_max_backoff);
|
||||
dout(10) << "fault done waiting or woke up" << dendl;
|
||||
}
|
||||
last_attempt = now;
|
||||
}
|
||||
|
||||
void SimpleMessenger::Pipe::fail()
|
||||
|
@ -103,8 +103,7 @@ private:
|
||||
protected:
|
||||
Connection *connection_state;
|
||||
|
||||
utime_t first_fault; // time of original failure
|
||||
utime_t last_attempt; // time of last reconnect attempt
|
||||
utime_t backoff; // backoff time
|
||||
|
||||
bool reader_running;
|
||||
bool writer_running;
|
||||
|
Loading…
Reference in New Issue
Block a user