mon: let peon mons send the osdmap replies

Currently the leader mon often replies to OSDs by sending a set of
incremental OSDmaps (e.g., in response to an osd boot or failure).

Instead, send a small message to the proxying peon mon (if any)
with the epoch to start from and let *them* generate a suitable
reply.

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2015-09-16 21:44:04 -04:00
parent 05aaa60eb5
commit 39e06ef8f0
5 changed files with 52 additions and 15 deletions

View File

@ -71,6 +71,7 @@
#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
#define CEPH_FEATURE_NEW_OSDOP_ENCODING (1ULL<<56) /* New, v7 encoding */
#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
#define CEPH_FEATURE_RESERVED2 (1ULL<<61) /* slow down, we are almost out... */
#define CEPH_FEATURE_RESERVED (1ULL<<62) /* DO NOT USE THIS ... last bit! */
@ -164,6 +165,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
CEPH_FEATURE_OSD_HITSET_GMT | \
CEPH_FEATURE_HAMMER_0_94_4 | \
CEPH_FEATURE_MON_STATEFUL_SUB | \
CEPH_FEATURE_MON_ROUTE_OSDMAP | \
0ULL)
#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL

View File

@ -22,24 +22,35 @@
struct MRoute : public Message {
static const int HEAD_VERSION = 2;
static const int HEAD_VERSION = 3;
static const int COMPAT_VERSION = 2;
uint64_t session_mon_tid;
Message *msg;
entity_inst_t dest;
epoch_t send_osdmap_first;
MRoute() : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), msg(NULL) {}
MRoute() : Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION),
session_mon_tid(0),
msg(NULL),
send_osdmap_first(0) {}
MRoute(uint64_t t, Message *m)
: Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), session_mon_tid(t), msg(m) {}
: Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION),
session_mon_tid(t),
msg(m),
send_osdmap_first(0) {}
MRoute(bufferlist bl, const entity_inst_t& i)
: Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION), session_mon_tid(0), dest(i) {
: Message(MSG_ROUTE, HEAD_VERSION, COMPAT_VERSION),
session_mon_tid(0),
dest(i),
send_osdmap_first(0) {
bufferlist::iterator p = bl.begin();
msg = decode_message(NULL, 0, p);
}
private:
~MRoute() {
if (msg) msg->put();
if (msg)
msg->put();
}
public:
@ -55,23 +66,25 @@ public:
} else {
msg = decode_message(NULL, 0, p);
}
if (header.version >= 3) {
::decode(send_osdmap_first, p);
}
}
void encode_payload(uint64_t features) {
::encode(session_mon_tid, payload);
::encode(dest, payload);
if (features & CEPH_FEATURE_MON_NULLROUTE) {
header.version = HEAD_VERSION;
header.compat_version = COMPAT_VERSION;
bool m = msg ? true : false;
::encode(m, payload);
if (msg)
encode_message(msg, features, payload);
} else {
if ((features & CEPH_FEATURE_MON_NULLROUTE) == 0) {
header.version = 1;
header.compat_version = 1;
assert(msg);
encode_message(msg, features, payload);
return;
}
bool m = msg ? true : false;
::encode(m, payload);
if (msg)
encode_message(msg, features, payload);
::encode(send_osdmap_first, payload);
}
const char *get_type_name() const { return "route"; }
@ -80,6 +93,8 @@ public:
o << "route(" << *msg;
else
o << "route(no-reply";
if (send_osdmap_first)
o << " send_osdmap_first " << send_osdmap_first;
if (session_mon_tid)
o << " tid " << session_mon_tid << ")";
else

View File

@ -3272,6 +3272,11 @@ void Monitor::handle_route(MonOpRequestRef op)
rr->con->send_message(m->msg);
m->msg = NULL;
}
if (m->send_osdmap_first) {
dout(10) << " sending osdmaps from " << m->send_osdmap_first << dendl;
osdmon()->send_incremental(m->send_osdmap_first, rr->session,
true, MonOpRequestRef());
}
routed_requests.erase(m->session_mon_tid);
rr->session->routed_request_tids.insert(rr->tid);
delete rr;

View File

@ -43,6 +43,7 @@
#include "messages/MMonCommand.h"
#include "messages/MRemoveSnaps.h"
#include "messages/MOSDScrub.h"
#include "messages/MRoute.h"
#include "common/TextTable.h"
#include "common/Timer.h"
@ -2400,7 +2401,20 @@ void OSDMonitor::send_incremental(MonOpRequestRef op, epoch_t first)
MonSession *s = op->get_session();
assert(s);
send_incremental(first, s, false, op);
if (s->proxy_con &&
s->proxy_con->has_feature(CEPH_FEATURE_MON_ROUTE_OSDMAP)) {
// oh, we can tell the other mon to do it
dout(10) << __func__ << " asking proxying mon to send_incremental from "
<< first << dendl;
MRoute *r = new MRoute(s->proxy_tid, NULL);
r->send_osdmap_first = first;
s->proxy_con->send_message(r);
op->mark_event("reply: send routed send_osdmap_first reply");
} else {
// do it ourselves
send_incremental(first, s, false, op);
}
}
void OSDMonitor::send_incremental(epoch_t first,

View File

@ -227,11 +227,12 @@ private:
MOSDMap *build_incremental(epoch_t first, epoch_t last);
void send_full(MonOpRequestRef op);
void send_incremental(MonOpRequestRef op, epoch_t first);
public:
// @param req an optional op request, if the osdmaps are replies to it. so
// @c Monitor::send_reply() can mark_event with it.
void send_incremental(epoch_t first, MonSession *session, bool onetime,
MonOpRequestRef req = MonOpRequestRef());
private:
int reweight_by_utilization(int oload, std::string& out_str, bool by_pg,
const set<int64_t> *pools);