mirror of
https://github.com/ceph/ceph
synced 2025-01-03 17:42:36 +00:00
osd: Adjust failure reporting.
When a failure report is sent to the mon, the failed OSD is added to pending_failures. If the OSD gets a heartbeat from an OSD in pending_failures, it sends an MOSDFailure message repealing the previous failure report. If an OSD is marked as failed but a message hasn't been sent, it's simply removed from the failed list.
This commit is contained in:
parent
83e58257a9
commit
d979e48d93
@ -1334,6 +1334,12 @@ void OSD::handle_osd_ping(MOSDPing *m)
|
||||
}
|
||||
|
||||
heartbeat_from_stamp[from] = g_clock.now(); // don't let _my_ lag interfere.
|
||||
// remove from failure lists if needed
|
||||
if (failure_pending.count(from)) {
|
||||
send_still_alive(from);
|
||||
failure_pending.erase(from);
|
||||
}
|
||||
failure_queue.erase(from);
|
||||
} else {
|
||||
dout(10) << " ignoring " << m->get_source_inst() << dendl;
|
||||
}
|
||||
@ -1376,6 +1382,7 @@ void OSD::heartbeat_check()
|
||||
<< " since " << heartbeat_from_stamp[p->first]
|
||||
<< " (cutoff " << grace << ")" << dendl;
|
||||
queue_failure(p->first);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1620,9 +1627,18 @@ void OSD::send_failures()
|
||||
int osd = *failure_queue.begin();
|
||||
monc->send_mon_message(new MOSDFailure(monc->get_fsid(), osdmap->get_inst(osd), osdmap->get_epoch()));
|
||||
failure_queue.erase(osd);
|
||||
failure_pending.insert(osd);
|
||||
}
|
||||
}
|
||||
|
||||
void OSD::send_still_alive(int osd)
|
||||
{
|
||||
MOSDFailure *m = new MOSDFailure(monc->get_fsid(), osdmap->get_inst(osd),
|
||||
osdmap->get_epoch());
|
||||
m->is_failed = false;
|
||||
monc->send_mon_message(m);
|
||||
}
|
||||
|
||||
void OSD::send_pg_stats()
|
||||
{
|
||||
assert(osd_lock.is_locked());
|
||||
|
@ -546,10 +546,12 @@ protected:
|
||||
set<int> failure_queue;
|
||||
set<int> failure_pending;
|
||||
|
||||
|
||||
void queue_failure(int n) {
|
||||
failure_queue.insert(n);
|
||||
}
|
||||
void send_failures();
|
||||
void send_still_alive(int osd);
|
||||
|
||||
// -- pg stats --
|
||||
Mutex pg_stat_queue_lock;
|
||||
|
Loading…
Reference in New Issue
Block a user