mirror of
https://github.com/ceph/ceph
synced 2025-02-25 03:52:04 +00:00
mon: clear connection score during update & add sanity check live/dead connection report
When upgrading the monitors (include booting up), we check if `peer_tracker` is dirty or not. If so, we clear it. Added some functions in `Elector` and `ConnectionTracker` class to check for clean `peer_tracker`. Moreover, there could be some cases where due to startup weirdness or abnormal circumstances, we might get a report from our own rank. Therefore, it doesn't hurt to add a sanity check in `ConnectionTracker::report_live_connection` and `ConnectionTracker::report_dead_connection`. Fixes: https://tracker.ceph.com/issues/58049 Signed-off-by: Kamoltat <ksirivad@redhat.com>
This commit is contained in:
parent
8efc19911b
commit
767a4be12d
@ -106,6 +106,10 @@ void ConnectionTracker::report_live_connection(int peer_rank, double units_alive
|
||||
{
|
||||
ldout(cct, 30) << __func__ << " peer_rank: " << peer_rank << " units_alive: " << units_alive << dendl;
|
||||
ldout(cct, 30) << "my_reports before: " << my_reports << dendl;
|
||||
if (peer_rank == rank) {
|
||||
lderr(cct) << "Got a report from my own rank, hopefully this is startup weirdness, dropping" << dendl;
|
||||
return;
|
||||
}
|
||||
// we need to "auto-initialize" to 1, do shenanigans
|
||||
auto i = my_reports.history.find(peer_rank);
|
||||
if (i == my_reports.history.end()) {
|
||||
@ -130,6 +134,10 @@ void ConnectionTracker::report_dead_connection(int peer_rank, double units_dead)
|
||||
{
|
||||
ldout(cct, 30) << __func__ << " peer_rank: " << peer_rank << " units_dead: " << units_dead << dendl;
|
||||
ldout(cct, 30) << "my_reports before: " << my_reports << dendl;
|
||||
if (peer_rank == rank) {
|
||||
lderr(cct) << "Got a report from my own rank, hopefully this is startup weirdness, dropping" << dendl;
|
||||
return;
|
||||
}
|
||||
// we need to "auto-initialize" to 1, do shenanigans
|
||||
auto i = my_reports.history.find(peer_rank);
|
||||
if (i == my_reports.history.end()) {
|
||||
@ -246,6 +254,22 @@ void ConnectionTracker::notify_rank_removed(int rank_removed, int new_rank)
|
||||
increase_version();
|
||||
}
|
||||
|
||||
bool ConnectionTracker::is_clean(int mon_rank, int monmap_size)
|
||||
{
|
||||
ldout(cct, 30) << __func__ << dendl;
|
||||
// check consistency between our rank according
|
||||
// to monmap and our rank according to our report.
|
||||
if (rank != mon_rank ||
|
||||
my_reports.rank != mon_rank) {
|
||||
return false;
|
||||
} else if (!peer_reports.empty()){
|
||||
// if peer_report max rank is greater than monmap max rank
|
||||
// then there is a problem.
|
||||
if (peer_reports.rbegin()->first > monmap_size - 1) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ConnectionTracker::encode(bufferlist &bl) const
|
||||
{
|
||||
ENCODE_START(1, 1, bl);
|
||||
|
@ -120,6 +120,13 @@ class ConnectionTracker {
|
||||
*/
|
||||
void get_total_connection_score(int peer_rank, double *rating,
|
||||
int *live_count) const;
|
||||
/**
|
||||
* Check if our ranks are clean and make
|
||||
* sure there are no extra peer_report lingering.
|
||||
* In the future we also want to check the reports
|
||||
* current and history of each peer_report.
|
||||
*/
|
||||
bool is_clean(int mon_rank, int monmap_size);
|
||||
/**
|
||||
* Encode this ConnectionTracker. Useful both for storing on disk
|
||||
* and for sending off to peers for decoding and import
|
||||
@ -185,6 +192,7 @@ class ConnectionTracker {
|
||||
rank = new_rank;
|
||||
my_reports.rank = rank;
|
||||
}
|
||||
|
||||
void notify_rank_changed(int new_rank);
|
||||
void notify_rank_removed(int rank_removed, int new_rank);
|
||||
friend std::ostream& operator<<(std::ostream& o, const ConnectionTracker& c);
|
||||
|
@ -716,6 +716,11 @@ void Elector::start_participating()
|
||||
logic.participating = true;
|
||||
}
|
||||
|
||||
bool Elector::peer_tracker_is_clean()
|
||||
{
|
||||
return peer_tracker.is_clean(mon->rank, paxos_size());
|
||||
}
|
||||
|
||||
void Elector::notify_clear_peer_state()
|
||||
{
|
||||
dout(10) << __func__ << dendl;
|
||||
|
@ -357,6 +357,11 @@ class Elector : public ElectionOwner, RankProvider {
|
||||
* @post @p participating is true
|
||||
*/
|
||||
void start_participating();
|
||||
/**
|
||||
* Check if our peer_tracker is self-consistent, not suffering from
|
||||
* https://tracker.ceph.com/issues/58049
|
||||
*/
|
||||
bool peer_tracker_is_clean();
|
||||
/**
|
||||
* Forget everything about our peers. :(
|
||||
*/
|
||||
|
@ -943,7 +943,15 @@ int Monitor::init()
|
||||
osdmon()->get_filestore_osd_list();
|
||||
|
||||
state = STATE_PROBING;
|
||||
|
||||
bootstrap();
|
||||
|
||||
if (!elector.peer_tracker_is_clean()){
|
||||
dout(10) << "peer_tracker looks inconsistent"
|
||||
<< " previous bad logic, clearing ..." << dendl;
|
||||
elector.notify_clear_peer_state();
|
||||
}
|
||||
|
||||
// add features of myself into feature_map
|
||||
session_map.feature_map.add_mon(con_self->get_features());
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user