msgr: simple exponential backoff, with tunable initial and max delay

This commit is contained in:
Sage Weil 2009-10-12 23:04:24 -07:00
parent 329f5c6490
commit 5d33a74fba
4 changed files with 15 additions and 23 deletions

View File

@ -341,8 +341,8 @@ static struct config_option config_optionsp[] = {
OPTION(clock_lock, 0, OPT_BOOL, false),
OPTION(clock_tare, 0, OPT_BOOL, false),
OPTION(ms_tcp_nodelay, 0, OPT_BOOL, true),
OPTION(ms_retry_interval, 0, OPT_DOUBLE, 2.0), // how often to attempt reconnect
OPTION(ms_fail_interval, 0, OPT_DOUBLE, 15.0), // fail after this long
OPTION(ms_initial_backoff, 0, OPT_DOUBLE, .2),
OPTION(ms_max_backoff, 0, OPT_DOUBLE, 15.0),
OPTION(ms_die_on_failure, 0, OPT_BOOL, false),
OPTION(ms_nocrc, 0, OPT_BOOL, false),
OPTION(ms_die_on_bad_msg, 0, OPT_BOOL, false),

View File

@ -117,8 +117,8 @@ struct md_config_t {
*/
bool ms_tcp_nodelay;
double ms_retry_interval;
double ms_fail_interval;
double ms_initial_backoff;
double ms_max_backoff;
bool ms_die_on_failure;
bool ms_nocrc;
bool ms_die_on_bad_msg;

View File

@ -1000,7 +1000,7 @@ int SimpleMessenger::Pipe::connect()
state = STATE_OPEN;
connect_seq = cseq + 1;
assert(connect_seq == reply.connect_seq);
first_fault = last_attempt = utime_t();
backoff = utime_t();
dout(20) << "connect success " << connect_seq << ", lossy = " << policy.lossy << dendl;
if (!reader_running) {
@ -1124,33 +1124,26 @@ void SimpleMessenger::Pipe::fault(bool onconnect, bool onread)
return;
}
utime_t now = g_clock.now();
if (state != STATE_CONNECTING) {
if (!onconnect)
dout(0) << "fault initiating reconnect" << dendl;
connect_seq++;
state = STATE_CONNECTING;
first_fault = now;
} else if (first_fault.sec() == 0) {
backoff = utime_t();
} else if (backoff == utime_t()) {
if (!onconnect)
dout(0) << "fault first fault" << dendl;
first_fault = now;
backoff.set_from_double(g_conf.ms_initial_backoff);
} else {
#warning clean me up
utime_t failinterval = now - first_fault;
utime_t retryinterval = now - last_attempt;
if (!onconnect) dout(10) << "fault failure was " << failinterval
<< " ago, last attempt was at " << last_attempt
<< ", " << retryinterval << " ago" << dendl;
// wait
now += 1.0;
dout(10) << "fault waiting until " << now << dendl;
cond.WaitUntil(lock, now);
dout(10) << "fault waiting " << backoff << dendl;
cond.WaitInterval(lock, backoff);
backoff += backoff;
if (backoff > g_conf.ms_max_backoff)
backoff.set_from_double(g_conf.ms_max_backoff);
dout(10) << "fault done waiting or woke up" << dendl;
}
last_attempt = now;
}
void SimpleMessenger::Pipe::fail()

View File

@ -103,8 +103,7 @@ private:
protected:
Connection *connection_state;
utime_t first_fault; // time of original failure
utime_t last_attempt; // time of last reconnect attempt
utime_t backoff; // backoff time
bool reader_running;
bool writer_running;