Elector: send an OP_NAK MMonElection to old peers who support it

Only new monitors support receiving OP_NAK from a peer without crashing, but
when we add new required features in the future, our monitors can accept
an OP_NAK message which tells them what features they're missing. Then they
will print out an error message and shut down.
(Unfortunately, doing a clean shutdown from here would require a lot of
infrastructure, so we just call exit(0).)

Signed-off-by: Greg Farnum <greg@inktank.com>
This commit is contained in:
Greg Farnum 2014-01-23 14:52:40 -08:00
parent 687b570b71
commit 3d4a6739f2
4 changed files with 76 additions and 12 deletions

View File

@ -44,7 +44,9 @@
#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37)
#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */
#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) /* supports new-style OSDMap encoding */
/* The process supports new-style OSDMap encoding. Monitors also use
this bit to determine if peers support NAK messages. */
#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39)
/*
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature

View File

@ -45,11 +45,11 @@ public:
bufferlist monmap_bl;
set<int> quorum;
uint64_t quorum_features;
bufferlist sharing_bl;
/* the following were both used in the next branch for a while
* on user cluster, so we've left them in for compatibility. */
version_t defunct_one;
version_t defunct_two;
bufferlist commands;
MMonElection() : Message(MSG_MON_ELECTION, HEAD_VERSION, COMPAT_VERSION),
op(0), epoch(0), quorum_features(0), defunct_one(0),
@ -91,7 +91,7 @@ public:
::encode(quorum_features, payload);
::encode(defunct_one, payload);
::encode(defunct_two, payload);
::encode(commands, payload);
::encode(sharing_bl, payload);
}
void decode_payload() {
bufferlist::iterator p = payload.begin();
@ -112,7 +112,7 @@ public:
::decode(defunct_two, p);
}
if (header.version >= 5)
::decode(commands, p);
::decode(sharing_bl, p);
}
};

View File

@ -111,7 +111,7 @@ void Elector::defer(int who)
leader_acked = who;
ack_stamp = ceph_clock_now(g_ceph_context);
MMonElection *m = new MMonElection(MMonElection::OP_ACK, epoch, mon->monmap);
m->commands = mon->get_supported_commands_bl();
m->sharing_bl = mon->get_supported_commands_bl();
mon->messenger->send_message(m, mon->monmap->get_inst(who));
// set a timer
@ -196,7 +196,7 @@ void Elector::victory()
MMonElection *m = new MMonElection(MMonElection::OP_VICTORY, epoch, mon->monmap);
m->quorum = quorum;
m->quorum_features = features;
m->commands = *cmds_bl;
m->sharing_bl = *cmds_bl;
mon->messenger->send_message(m, mon->monmap->get_inst(*p));
}
@ -213,8 +213,9 @@ void Elector::handle_propose(MMonElection *m)
assert(m->epoch % 2 == 1); // election
if ((required_features ^ m->get_connection()->get_features()) &
required_features) {
dout(5) << " ignoring propose from mon without required features" << dendl;
m->put();
dout(5) << " ignoring propose from mon" << from
<< " without required features" << dendl;
nak_old_peer(m);
return;
} else if (m->epoch > epoch) {
bump_epoch(m->epoch);
@ -278,7 +279,7 @@ void Elector::handle_ack(MMonElection *m)
if (electing_me) {
// thanks
acked_me[from] = m->get_connection()->get_features();
if (!m->commands.length())
if (!m->sharing_bl.length())
classic_mons.insert(from);
dout(5) << " so far i have " << acked_me << dendl;
@ -324,10 +325,10 @@ void Elector::handle_victory(MMonElection *m)
cancel_timer();
// stash leader's commands
if (m->commands.length()) {
if (m->sharing_bl.length()) {
MonCommand *new_cmds;
int cmdsize;
bufferlist::iterator bi = m->commands.begin();
bufferlist::iterator bi = m->sharing_bl.begin();
MonCommand::decode_array(&new_cmds, &cmdsize, bi);
mon->set_leader_supported_commands(new_cmds, cmdsize);
} else { // they are a legacy monitor; use known legacy command set
@ -340,8 +341,41 @@ void Elector::handle_victory(MMonElection *m)
m->put();
}
void Elector::nak_old_peer(MMonElection *m)
{
uint64_t supported_features = m->get_connection()->get_features();
if (supported_features & CEPH_FEATURE_OSDMAP_ENC) {
uint64_t required_features = mon->apply_compatset_features_to_quorum_requirements();
dout(10) << "sending nak to peer " << m->get_source()
<< " that only supports " << supported_features
<< " of the required " << required_features << dendl;
MMonElection *reply = new MMonElection(MMonElection::OP_NAK, m->epoch,
mon->monmap);
reply->quorum_features = required_features;
mon->features.encode(reply->sharing_bl);
mon->messenger->send_message(reply, m->get_connection());
}
m->put();
}
void Elector::handle_nak(MMonElection *m)
{
dout(1) << "handle_nak from " << m->get_source()
<< " quorum_features " << m->quorum_features << dendl;
CompatSet other;
bufferlist::iterator bi = m->sharing_bl.begin();
other.decode(bi);
CompatSet diff = Monitor::get_supported_features().unsupported(other);
derr << "Shutting down because I do not support required monitor features: { "
<< diff << " }" << dendl;
exit(0);
// the end!
}
void Elector::dispatch(Message *m)
{
@ -422,6 +456,9 @@ void Elector::dispatch(Message *m)
case MMonElection::OP_VICTORY:
handle_victory(em);
return;
case MMonElection::OP_NAK:
handle_nak(em);
return;
default:
assert(0);
}

View File

@ -245,7 +245,7 @@ class Elector {
* @post We sent a message of type OP_VICTORY to each quorum member.
*/
void victory();
/**
* Handle a message from some other node proposing himself to become him
* the Leader.
@ -317,6 +317,31 @@ class Elector {
* @param m A message with an operation type of OP_VICTORY
*/
void handle_victory(class MMonElection *m);
/**
* Send a nak to a peer who's out of date, containing information about why.
*
* If we get a message from a peer who can't support the required quorum
* features, we have to ignore them. This function will at least send
* them a message about *why* they're being ignored -- if they're new
* enough to support such a message.
*
* @param m A message from a monitor not supporting required features. We
* take ownership of the reference.
*/
void nak_old_peer(class MMonElection *m);
/**
* Handle a message from some other participant declaring
* we cannot join the quorum.
*
* Apparently the quorum requires some feature that we do not implement. Shut
* down gracefully.
*
* @pre Election is on-going.
* @post We've shut down.
*
* @param m A message with an operation type of OP_NAK
*/
void handle_nak(class MMonElection *m);
public:
/**