osd: mark down connections from old peers

Close out any connection with an old peer.  This avoids a race like:

- peer marked down
- we get map, mark down the con
- they reconnect and try to send us some stuff
- we share our map to tell them they are old and dead, but leave the con
  open
...
- peer marks itself up a few times, eventually reuses the same port
- sends messages on their fresh con
- we discard because of our old con

This could cause a tight reconnect loop, but it is better than wrong
behavior.

Other possible fixes:
 - make addr nonce truly unique (augment pid in nonce)
 - make a smarter 'disposable' msgr state (bleh)

Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
Sage Weil 2013-03-08 08:56:44 -08:00 committed by Samuel Just
parent ba7e815a18
commit 881e9d850c

View File

@ -4521,6 +4521,8 @@ bool OSD::require_same_or_newer_map(OpRequestRef op, epoch_t epoch)
Message *m = op->request;
dout(15) << "require_same_or_newer_map " << epoch << " (i am " << osdmap->get_epoch() << ") " << m << dendl;
assert(osd_lock.is_locked());
// do they have a newer map?
if (epoch > osdmap->get_epoch()) {
dout(7) << "waiting for newer map epoch " << epoch << " > my " << osdmap->get_epoch() << " with " << m << dendl;
@ -4538,12 +4540,8 @@ bool OSD::require_same_or_newer_map(OpRequestRef op, epoch_t epoch)
int from = m->get_source().num();
if (!osdmap->have_inst(from) ||
osdmap->get_cluster_addr(from) != m->get_source_inst().addr) {
if (m->get_connection()->has_feature(CEPH_FEATURE_OSD_HBMSGS)) {
dout(10) << "from dead osd." << from << ", dropping, sharing map" << dendl;
send_incremental_map(epoch, m->get_connection());
} else {
dout(10) << "from dead osd." << from << ", but it lacks OSD_HBMSGS feature, not sharing map" << dendl;
}
dout(10) << "from dead osd." << from << ", marking down" << dendl;
cluster_messenger->mark_down(m->get_connection());
return false;
}
}