From beb12fa25315153e1a06a0104883de89776438a6 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Wed, 18 Mar 2020 03:25:47 -0400 Subject: [PATCH] mds: do not defer incoming mgrmap when mds is laggy When the mds is laggy, the incoming mgrmap is queued to be processed at a later stage. But, the mds does not handle mgrmap message directly. So, later when the mds is not laggy anymore, the mgrmap message is not handled and is dropped. But, when the mgrmap message was queued up, the mds acknowledges that it has handled the message. This causes the mgr client instance to never process the mgrmap and never connecting to the manager (the receipt of mgrmap drives the connection to the manager). The fix is to not acknowledge messages that the mds cannot handle. In normal cases, the mds does not ack the message but when it's laggy, it just blindly queues up the message -- so, check if the message can be handled (later) even when the mds is laggy. Also, a minor change in a function name -- handle_deferrable_message() is kind of a misnomer since the function is called to process messages that are not deferred. That's changed to handle_message() now. Fixes: http://tracker.ceph.com/issues/44638 Signed-off-by: Venky Shankar --- src/mds/MDSDaemon.cc | 11 +++++++++ src/mds/MDSRank.cc | 55 +++++++++++++++++++++++++++++++++++--------- src/mds/MDSRank.h | 13 ++--------- 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index e7b1f51001e..d0ff38a3ef0 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -904,6 +904,17 @@ bool MDSDaemon::ms_dispatch2(const ref_t &m) /* * high priority messages we always process */ + +#define ALLOW_MESSAGES_FROM(peers) \ + do { \ + if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ + dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" \ + << m->get_connection()->get_peer_type() << " allowing=" \ + << #peers << " message=" << *m << dendl; \ + return true; \ + } \ + } while (0) + bool MDSDaemon::handle_core_message(const cref_t &m) { switch (m->get_type()) { diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 6c18068893d..ce64fa0a5a8 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1010,6 +1010,10 @@ bool MDSRank::_dispatch(const cref_t &m, bool new_msg) if (is_stale_message(m)) { return true; } + // do not proceed if this message cannot be handled + if (!is_valid_message(m)) { + return false; + } if (beacon.is_laggy()) { dout(5) << " laggy, deferring " << *m << dendl; @@ -1018,10 +1022,7 @@ bool MDSRank::_dispatch(const cref_t &m, bool new_msg) dout(5) << " there are deferred messages, deferring " << *m << dendl; waiting_for_nolaggy.push_back(m); } else { - if (!handle_deferrable_message(m)) { - return false; - } - + handle_message(m); heartbeat_reset(); } @@ -1132,10 +1133,45 @@ void MDSRank::update_mlogger() } } +// message types that the mds can handle +bool MDSRank::is_valid_message(const cref_t &m) { + int port = m->get_type() & 0xff00; + int type = m->get_type(); + + if (port == MDS_PORT_CACHE || + port == MDS_PORT_MIGRATOR || + type == CEPH_MSG_CLIENT_SESSION || + type == CEPH_MSG_CLIENT_RECONNECT || + type == CEPH_MSG_CLIENT_RECLAIM || + type == CEPH_MSG_CLIENT_REQUEST || + type == MSG_MDS_SLAVE_REQUEST || + type == MSG_MDS_HEARTBEAT || + type == MSG_MDS_TABLE_REQUEST || + type == MSG_MDS_LOCK || + type == MSG_MDS_INODEFILECAPS || + type == CEPH_MSG_CLIENT_CAPS || + type == CEPH_MSG_CLIENT_CAPRELEASE || + type == CEPH_MSG_CLIENT_LEASE) { + return true; + } + + return false; +} + /* * lower priority messages we defer if we seem laggy */ -bool MDSRank::handle_deferrable_message(const cref_t &m) + +#define ALLOW_MESSAGES_FROM(peers) \ + do { \ + if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ + dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" << m->get_connection()->get_peer_type() \ + << " allowing=" << #peers << " message=" << *m << dendl; \ + return; \ + } \ + } while (0) + +void MDSRank::handle_message(const cref_t &m) { int port = m->get_type() & 0xff00; @@ -1199,11 +1235,9 @@ bool MDSRank::handle_deferrable_message(const cref_t &m) break; default: - return false; + derr << "unrecogonized message " << *m << dendl; } } - - return true; } /** @@ -1239,9 +1273,8 @@ void MDSRank::_advance_queues() if (!is_stale_message(old)) { dout(7) << " processing laggy deferred " << *old << dendl; - if (!handle_deferrable_message(old)) { - dout(0) << "unrecognized message " << *old << dendl; - } + ceph_assert(is_valid_message(old)); + handle_message(old); } heartbeat_reset(); diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 9367034b6bd..c0605be626a 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -421,7 +421,8 @@ class MDSRank { void inc_dispatch_depth() { ++dispatch_depth; } void dec_dispatch_depth() { --dispatch_depth; } void retry_dispatch(const cref_t &m); - bool handle_deferrable_message(const cref_t &m); + bool is_valid_message(const cref_t &m); + void handle_message(const cref_t &m); void _advance_queues(); bool _dispatch(const cref_t &m, bool new_msg); bool is_stale_message(const cref_t &m) const; @@ -653,15 +654,5 @@ public: bool ms_dispatch(const cref_t &m); }; -// This utility for MDS and MDSRank dispatchers. -#define ALLOW_MESSAGES_FROM(peers) \ -do { \ - if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ - dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" << m->get_connection()->get_peer_type() \ - << " allowing=" << #peers << " message=" << *m << dendl; \ - return true; \ - } \ -} while (0) - #endif // MDS_RANK_H_