mirror of
https://github.com/ceph/ceph
synced 2025-02-24 03:27:10 +00:00
osd: do not clobber explicitly requested heartbeat_to target addresss
Consider peer P. - P does down in, say, epoch 60, and back up in epoch 70 - P and requests a heartbeat, as_of 70 - We update to map 50, and coincidentally add the same peer as a target - We set the heartbeat_to[P] = 50 and start sending to the _old_ address - P marks us down because we stop sending to the new addr - We eventually get map 70, but it's too late! Make sure we preserve any _to targets _and_ their epoch+inst. Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
This commit is contained in:
parent
e1830dbd09
commit
7a574d88ae
@ -1415,6 +1415,18 @@ void OSD::update_heartbeat_peers()
|
||||
|
||||
heartbeat_epoch = osdmap->get_epoch();
|
||||
|
||||
// grandfather newer _to peers
|
||||
for (map<int,epoch_t>::iterator p = old_to.begin();
|
||||
p != old_to.end();
|
||||
p++) {
|
||||
if (p->second > osdmap->get_epoch()) {
|
||||
dout(10) << "update_heartbeat_peers: keeping newer _to peer " << old_inst[p->first]
|
||||
<< " as of " << p->second << dendl;
|
||||
heartbeat_to[p->first] = p->second;
|
||||
heartbeat_inst[p->first] = old_inst[p->first];
|
||||
}
|
||||
}
|
||||
|
||||
// build heartbeat to/from set
|
||||
for (hash_map<pg_t, PG*>::iterator i = pg_map.begin();
|
||||
i != pg_map.end();
|
||||
@ -1461,30 +1473,24 @@ void OSD::update_heartbeat_peers()
|
||||
assert(old_inst.count(p->first));
|
||||
if (heartbeat_to.count(p->first))
|
||||
continue;
|
||||
if (p->second > osdmap->get_epoch()) {
|
||||
dout(10) << "update_heartbeat_peers: keeping newer _to peer " << old_inst[p->first]
|
||||
<< " as of " << p->second << dendl;
|
||||
heartbeat_to[p->first] = p->second;
|
||||
heartbeat_inst[p->first] = old_inst[p->first];
|
||||
assert(p->second <= osdmap->get_epoch());
|
||||
if (heartbeat_from.count(p->first) && old_inst[p->first] == heartbeat_inst[p->first]) {
|
||||
dout(10) << "update_heartbeat_peers: old _to peer " << old_inst[p->first]
|
||||
<< " is still a _from peer, not marking down" << dendl;
|
||||
} else {
|
||||
if (heartbeat_from.count(p->first) && old_inst[p->first] == heartbeat_inst[p->first]) {
|
||||
dout(10) << "update_heartbeat_peers: old _to peer " << old_inst[p->first]
|
||||
<< " is still a _from peer, not marking down" << dendl;
|
||||
} else {
|
||||
dout(10) << "update_heartbeat_peers: marking down old _to peer " << old_inst[p->first]
|
||||
<< " as of " << p->second << dendl;
|
||||
heartbeat_messenger->mark_down(old_inst[p->first].addr);
|
||||
}
|
||||
dout(10) << "update_heartbeat_peers: marking down old _to peer " << old_inst[p->first]
|
||||
<< " as of " << p->second << dendl;
|
||||
heartbeat_messenger->mark_down(old_inst[p->first].addr);
|
||||
}
|
||||
|
||||
if (!osdmap->is_down(p->first) &&
|
||||
osdmap->get_hb_inst(p->first) == old_inst[p->first]) {
|
||||
dout(10) << "update_heartbeat_peers: sharing map with old _to peer " << old_inst[p->first]
|
||||
<< " as of " << p->second << dendl;
|
||||
// share latest map with this peer (via the cluster link, NOT
|
||||
// the heartbeat link), so they know not to expect heartbeats
|
||||
// from us. otherwise they may mark us down!
|
||||
_share_map_outgoing(osdmap->get_cluster_inst(p->first));
|
||||
}
|
||||
if (!osdmap->is_down(p->first) &&
|
||||
osdmap->get_hb_inst(p->first) == old_inst[p->first]) {
|
||||
dout(10) << "update_heartbeat_peers: sharing map with old _to peer " << old_inst[p->first]
|
||||
<< " as of " << p->second << dendl;
|
||||
// share latest map with this peer (via the cluster link, NOT
|
||||
// the heartbeat link), so they know not to expect heartbeats
|
||||
// from us. otherwise they may mark us down!
|
||||
_share_map_outgoing(osdmap->get_cluster_inst(p->first));
|
||||
}
|
||||
}
|
||||
for (map<int,epoch_t>::iterator p = old_from.begin();
|
||||
|
Loading…
Reference in New Issue
Block a user