mon: clear connection score during update & add sanity check live/dead connection report

When upgrading the monitors (include booting up),
we check if `peer_tracker` is dirty or not. If
so, we clear it. Added some functions in `Elector` and
`ConnectionTracker` class to
check for clean `peer_tracker`.

Moreover, there could be some cases where due
to startup weirdness or abnormal circumstances,
we might get a report from our own rank. Therefore,
it doesn't hurt to add a sanity check in
`ConnectionTracker::report_live_connection` and
`ConnectionTracker::report_dead_connection`.

Fixes: https://tracker.ceph.com/issues/58049

Signed-off-by: Kamoltat <ksirivad@redhat.com>
This commit is contained in:
Kamoltat 2022-11-11 22:56:46 +00:00
parent 8efc19911b
commit 767a4be12d
5 changed files with 50 additions and 0 deletions

View File

@ -106,6 +106,10 @@ void ConnectionTracker::report_live_connection(int peer_rank, double units_alive
{
ldout(cct, 30) << __func__ << " peer_rank: " << peer_rank << " units_alive: " << units_alive << dendl;
ldout(cct, 30) << "my_reports before: " << my_reports << dendl;
if (peer_rank == rank) {
lderr(cct) << "Got a report from my own rank, hopefully this is startup weirdness, dropping" << dendl;
return;
}
// we need to "auto-initialize" to 1, do shenanigans
auto i = my_reports.history.find(peer_rank);
if (i == my_reports.history.end()) {
@ -130,6 +134,10 @@ void ConnectionTracker::report_dead_connection(int peer_rank, double units_dead)
{
ldout(cct, 30) << __func__ << " peer_rank: " << peer_rank << " units_dead: " << units_dead << dendl;
ldout(cct, 30) << "my_reports before: " << my_reports << dendl;
if (peer_rank == rank) {
lderr(cct) << "Got a report from my own rank, hopefully this is startup weirdness, dropping" << dendl;
return;
}
// we need to "auto-initialize" to 1, do shenanigans
auto i = my_reports.history.find(peer_rank);
if (i == my_reports.history.end()) {
@ -246,6 +254,22 @@ void ConnectionTracker::notify_rank_removed(int rank_removed, int new_rank)
increase_version();
}
bool ConnectionTracker::is_clean(int mon_rank, int monmap_size)
{
ldout(cct, 30) << __func__ << dendl;
// check consistency between our rank according
// to monmap and our rank according to our report.
if (rank != mon_rank ||
my_reports.rank != mon_rank) {
return false;
} else if (!peer_reports.empty()){
// if peer_report max rank is greater than monmap max rank
// then there is a problem.
if (peer_reports.rbegin()->first > monmap_size - 1) return false;
}
return true;
}
void ConnectionTracker::encode(bufferlist &bl) const
{
ENCODE_START(1, 1, bl);

View File

@ -120,6 +120,13 @@ class ConnectionTracker {
*/
void get_total_connection_score(int peer_rank, double *rating,
int *live_count) const;
/**
* Check if our ranks are clean and make
* sure there are no extra peer_report lingering.
* In the future we also want to check the reports
* current and history of each peer_report.
*/
bool is_clean(int mon_rank, int monmap_size);
/**
* Encode this ConnectionTracker. Useful both for storing on disk
* and for sending off to peers for decoding and import
@ -185,6 +192,7 @@ class ConnectionTracker {
rank = new_rank;
my_reports.rank = rank;
}
void notify_rank_changed(int new_rank);
void notify_rank_removed(int rank_removed, int new_rank);
friend std::ostream& operator<<(std::ostream& o, const ConnectionTracker& c);

View File

@ -716,6 +716,11 @@ void Elector::start_participating()
logic.participating = true;
}
bool Elector::peer_tracker_is_clean()
{
return peer_tracker.is_clean(mon->rank, paxos_size());
}
void Elector::notify_clear_peer_state()
{
dout(10) << __func__ << dendl;

View File

@ -357,6 +357,11 @@ class Elector : public ElectionOwner, RankProvider {
* @post @p participating is true
*/
void start_participating();
/**
* Check if our peer_tracker is self-consistent, not suffering from
* https://tracker.ceph.com/issues/58049
*/
bool peer_tracker_is_clean();
/**
* Forget everything about our peers. :(
*/

View File

@ -943,7 +943,15 @@ int Monitor::init()
osdmon()->get_filestore_osd_list();
state = STATE_PROBING;
bootstrap();
if (!elector.peer_tracker_is_clean()){
dout(10) << "peer_tracker looks inconsistent"
<< " previous bad logic, clearing ..." << dendl;
elector.notify_clear_peer_state();
}
// add features of myself into feature_map
session_map.feature_map.add_mon(con_self->get_features());
return 0;