mon/MgrStatMonitor: ignore MMgrReport from non-active mgr

If it's not the active mgr, we should ignore it.

Since the mgr instance is best identified by the gid, add that to the
message.  (We can't use the source_addrs for the message since that is
the MgrStandby monc addr, not the active mgr addrs in the MgrMap.)

This fixes a problem where a just-demoted mgr report gets processed and a
new mgr gets a ServiceMap with an epoch >= its pending map.  (At least,
that is my theory!)

Fixes: https://tracker.ceph.com/issues/48022
Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
Sage Weil 2021-03-18 11:45:48 -05:00
parent 3dbc1f0578
commit 4d447092c3
3 changed files with 17 additions and 2 deletions

View File

@ -23,7 +23,7 @@
class MMonMgrReport final : public PaxosServiceMessage {
private:
static constexpr int HEAD_VERSION = 2;
static constexpr int HEAD_VERSION = 3;
static constexpr int COMPAT_VERSION = 1;
public:
@ -31,6 +31,7 @@ public:
health_check_map_t health_checks;
ceph::buffer::list service_map_bl; // encoded ServiceMap
std::map<std::string,ProgressEvent> progress_events;
uint64_t gid = 0;
MMonMgrReport()
: PaxosServiceMessage{MSG_MON_MGR_REPORT, 0, HEAD_VERSION, COMPAT_VERSION}
@ -42,7 +43,8 @@ public:
std::string_view get_type_name() const override { return "monmgrreport"; }
void print(std::ostream& out) const override {
out << get_type_name() << "(" << health_checks.checks.size() << " checks, "
out << get_type_name() << "(gid " << gid
<< ", " << health_checks.checks.size() << " checks, "
<< progress_events.size() << " progress events)";
}
@ -52,6 +54,7 @@ public:
encode(health_checks, payload);
encode(service_map_bl, payload);
encode(progress_events, payload);
encode(gid, payload);
if (!HAVE_FEATURE(features, SERVER_NAUTILUS) ||
!HAVE_FEATURE(features, SERVER_MIMIC)) {
@ -79,6 +82,9 @@ public:
if (header.version >= 2) {
decode(progress_events, p);
}
if (header.version >= 3) {
decode(gid, p);
}
}
private:
template<class T, typename... Args>

View File

@ -2525,6 +2525,7 @@ void DaemonServer::send_report()
}
auto m = ceph::make_message<MMonMgrReport>();
m->gid = monc->get_global_id();
py_modules.get_health_checks(&m->health_checks);
py_modules.get_progress_events(&m->progress_events);

View File

@ -3,6 +3,7 @@
#include "MgrStatMonitor.h"
#include "mon/OSDMonitor.h"
#include "mon/MgrMonitor.h"
#include "mon/PGMap.h"
#include "messages/MGetPoolStats.h"
#include "messages/MGetPoolStatsReply.h"
@ -211,7 +212,14 @@ bool MgrStatMonitor::prepare_update(MonOpRequestRef op)
bool MgrStatMonitor::preprocess_report(MonOpRequestRef op)
{
auto m = op->get_req<MMonMgrReport>();
mon.no_reply(op);
if (m->gid &&
m->gid != mon.mgrmon()->get_map().get_active_gid()) {
dout(10) << "ignoring report from non-active mgr " << m->gid
<< dendl;
return true;
}
return false;
}