mirror of
https://github.com/ceph/ceph
synced 2025-01-11 21:50:26 +00:00
osd: don't finish boot unless instance in map is really us
We were going BOOTING->ACTIVE as soon as we showed up in the map with the same client_addr. Also verify that we were up_from an epoch after when we started or rebound, to avoid the case where we rebind to the same ports for client_addr (but maybe not others) and get caught in a rebind loop. Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
This commit is contained in:
parent
ad38abcffb
commit
4aa90a5e68
@ -525,7 +525,7 @@ OSD::OSD(int id, Messenger *internal_messenger, Messenger *external_messenger,
|
||||
dev_path(dev), journal_path(jdev),
|
||||
dispatch_running(false),
|
||||
osd_compat(get_osd_compat_set()),
|
||||
state(STATE_BOOTING), boot_epoch(0), up_epoch(0),
|
||||
state(STATE_BOOTING), boot_epoch(0), up_epoch(0), bind_epoch(0),
|
||||
op_tp(external_messenger->cct, "OSD::op_tp", g_conf->osd_op_threads),
|
||||
recovery_tp(external_messenger->cct, "OSD::recovery_tp", g_conf->osd_recovery_threads),
|
||||
disk_tp(external_messenger->cct, "OSD::disk_tp", g_conf->osd_disk_threads),
|
||||
@ -663,6 +663,8 @@ int OSD::init()
|
||||
}
|
||||
osdmap = get_map(superblock.current_epoch);
|
||||
|
||||
bind_epoch = osdmap->get_epoch();
|
||||
|
||||
clear_temp();
|
||||
|
||||
// make sure (newish) temp dir exists
|
||||
@ -3174,7 +3176,8 @@ void OSD::handle_osd_map(MOSDMap *m)
|
||||
|
||||
C_Contexts *fin = new C_Contexts(g_ceph_context);
|
||||
if (osdmap->is_up(whoami) &&
|
||||
osdmap->get_addr(whoami) == client_messenger->get_myaddr()) {
|
||||
osdmap->get_addr(whoami) == client_messenger->get_myaddr() &&
|
||||
bind_epoch < osdmap->get_up_from(whoami)) {
|
||||
|
||||
if (is_booting()) {
|
||||
dout(1) << "state: booting -> active" << dendl;
|
||||
@ -3205,16 +3208,17 @@ void OSD::handle_osd_map(MOSDMap *m)
|
||||
<< " != my " << client_messenger->get_myaddr();
|
||||
else if (!osdmap->get_cluster_addr(whoami).probably_equals(cluster_messenger->get_myaddr()))
|
||||
clog.error() << "map e" << osdmap->get_epoch()
|
||||
<< " had wrong client addr (" << osdmap->get_cluster_addr(whoami)
|
||||
<< " had wrong cluster addr (" << osdmap->get_cluster_addr(whoami)
|
||||
<< " != my " << cluster_messenger->get_myaddr();
|
||||
else if (!osdmap->get_hb_addr(whoami).probably_equals(hbout_messenger->get_myaddr()))
|
||||
clog.error() << "map e" << osdmap->get_epoch()
|
||||
<< " had wrong client addr (" << osdmap->get_hb_addr(whoami)
|
||||
<< " had wrong hb addr (" << osdmap->get_hb_addr(whoami)
|
||||
<< " != my " << hbout_messenger->get_myaddr();
|
||||
|
||||
state = STATE_BOOTING;
|
||||
up_epoch = 0;
|
||||
do_restart = true;
|
||||
bind_epoch = osdmap->get_epoch();
|
||||
|
||||
int cport = cluster_messenger->get_myaddr().get_port();
|
||||
int hbport = hbout_messenger->get_myaddr().get_port();
|
||||
@ -3547,6 +3551,7 @@ void OSD::add_map_bl(epoch_t e, bufferlist& bl)
|
||||
dout(10) << "add_map_bl " << e << " " << bl.length() << " bytes" << dendl;
|
||||
map_bl[e] = bl;
|
||||
}
|
||||
|
||||
void OSD::add_map_inc_bl(epoch_t e, bufferlist& bl)
|
||||
{
|
||||
Mutex::Locker l(map_cache_lock);
|
||||
|
@ -206,6 +206,7 @@ private:
|
||||
int state;
|
||||
epoch_t boot_epoch; // _first_ epoch we were marked up (after this process started)
|
||||
epoch_t up_epoch; // _most_recent_ epoch we were marked up
|
||||
epoch_t bind_epoch; // epoch we last did a bind to new ip:ports
|
||||
|
||||
public:
|
||||
bool is_booting() { return state == STATE_BOOTING; }
|
||||
|
Loading…
Reference in New Issue
Block a user