mirror of
https://github.com/ceph/ceph
synced 2025-01-31 23:44:10 +00:00
Merge PR #25849 into master
* refs/pull/25849/head: qa/suites/rados/upgrade: one mon per node, and enable-msgr2 at end qa/rados/thrash-old-clients: avoid msgr2 mon: make bootstrap rank check more robust mon: clean up probe debug output a bit msg/async: use v1 for v1 <-> [v2,v1] peers msg/async/AsyncMessenger: drop single-use _send_to mon/HealthMonitor: raise MON_MSGR2_NOT_ENABLED if mons not bound to msgr2 doc/rados/operations/health-checks: document MON_* health warnings mon/MonMapMonitor: add 'mon enable-msgr2' command mon: respawn if rank addr changes mon/MonMap: calc_addr_mons() after setting rank addrvec Reviewed-by: Ricardo Dias <rdias@suse.com>
This commit is contained in:
commit
b5e5ee6f40
@ -21,6 +21,57 @@ that are defined by ceph-mgr python modules.
|
||||
Definitions
|
||||
===========
|
||||
|
||||
Monitor
|
||||
-------
|
||||
|
||||
MON_DOWN
|
||||
________
|
||||
|
||||
One or more monitor daemons is currently down. The cluster requires a
|
||||
majority (more than 1/2) of the monitors in order to function. When
|
||||
one or more monitors are down, clients may have a harder time forming
|
||||
their initial connection to the cluster as they may need to try more
|
||||
addresses before they reach an operating monitor.
|
||||
|
||||
The down monitor daemon should generally be restarted as soon as
|
||||
possible to reduce the risk of a subsequen monitor failure leading to
|
||||
a service outage.
|
||||
|
||||
MON_CLOCK_SKEW
|
||||
______________
|
||||
|
||||
The clocks on the hosts running the ceph-mon monitor daemons are not
|
||||
sufficiently well synchronized. This health alert is raised if the
|
||||
cluster detects a clock skew greater than ``mon_clock_drift_allowed``.
|
||||
|
||||
This is best resolved by synchronizing the clocks using a tool like
|
||||
``ntpd`` or ``chrony``.
|
||||
|
||||
If it is impractical to keep the clocks closely synchronized, the
|
||||
``mon_clock_drift_allowed`` threshold can also be increased, but this
|
||||
value must stay significantly below the ``mon_lease`` interval in
|
||||
order for monitor cluster to function properly.
|
||||
|
||||
MON_MSGR2_NOT_ENABLED
|
||||
_____________________
|
||||
|
||||
The ``ms_bind_msgr2`` option is enabled but one or more monitors is
|
||||
not configured to bind to a v2 port in the cluster's monmap. This
|
||||
means that features specific to the msgr2 protocol (e.g., encryption)
|
||||
are not available on some or all connections.
|
||||
|
||||
In most cases this can be corrected by issuing the command::
|
||||
|
||||
ceph mon enable-msgr2
|
||||
|
||||
That command will change any monitor configured for the old default
|
||||
port 6789 to continue to listen for v1 connections on 6789 and also
|
||||
listen for v2 connections on the new default 3300 port.
|
||||
|
||||
If a monitor is configured to listen for v1 connections on a non-standard port (not 6789), then the monmap will need to be modified manually.
|
||||
|
||||
|
||||
|
||||
Manager
|
||||
-------
|
||||
|
||||
|
@ -1,3 +1,7 @@
|
||||
tasks:
|
||||
- ceph:
|
||||
mon_bind_addrvec: false
|
||||
mon_bind_msgr2: false
|
||||
conf:
|
||||
global:
|
||||
ms bind msgr2: false
|
||||
|
@ -11,20 +11,27 @@ overrides:
|
||||
conf:
|
||||
global:
|
||||
ms dump corrupt message level: 0
|
||||
ms bind msgr2: false
|
||||
mds:
|
||||
debug ms: 1
|
||||
debug mds: 20
|
||||
roles:
|
||||
- - mon.a
|
||||
- mon.c
|
||||
- mgr.x
|
||||
- mgr.y
|
||||
- mds.a
|
||||
- osd.0
|
||||
- osd.1
|
||||
- osd.2
|
||||
- - mon.b
|
||||
- osd.3
|
||||
- - mon.b
|
||||
- osd.4
|
||||
- osd.5
|
||||
- osd.6
|
||||
- osd.7
|
||||
- - mon.c
|
||||
- mgr.y
|
||||
- osd.8
|
||||
- osd.9
|
||||
- osd.10
|
||||
- osd.11
|
||||
- - client.0
|
||||
|
@ -5,13 +5,14 @@ meta:
|
||||
restart : osd.0,1,2,3,4,5
|
||||
tasks:
|
||||
- install.upgrade:
|
||||
osd.0:
|
||||
mon.a:
|
||||
mon.b:
|
||||
- print: "**** done install.upgrade osd.0"
|
||||
- ceph.restart:
|
||||
daemons: [mon.a, mon.c]
|
||||
daemons: [mon.a, mon.b]
|
||||
wait-for-healthy: false
|
||||
mon-health-to-clog: false
|
||||
- ceph.restart:
|
||||
daemons: [osd.0, osd.1, osd.2]
|
||||
daemons: [osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, osd.6, osd.7]
|
||||
wait-for-healthy: false
|
||||
- print: "**** done ceph.restart 1st half"
|
||||
- print: "**** done ceph.restart 1st 2/3s"
|
||||
|
@ -10,18 +10,21 @@ overrides:
|
||||
- \(MDS_
|
||||
tasks:
|
||||
- install.upgrade:
|
||||
osd.3:
|
||||
mon.c:
|
||||
- ceph.restart:
|
||||
daemons: [mon.b, mgr.x, mgr.y]
|
||||
daemons: [mon.c, mgr.x, mgr.y]
|
||||
wait-for-up: true
|
||||
wait-for-healthy: false
|
||||
- ceph.restart:
|
||||
daemons: [osd.3, osd.4, osd.5]
|
||||
daemons: [osd.8, osd.9, osd.10, osd.11]
|
||||
wait-for-up: true
|
||||
wait-for-healthy: false
|
||||
- ceph.restart:
|
||||
daemons: [mds.a]
|
||||
wait-for-up: true
|
||||
wait-for-healthy: false
|
||||
- exec:
|
||||
mon.a:
|
||||
- ceph mon enable-msgr2
|
||||
- install.upgrade:
|
||||
client.0:
|
||||
|
@ -801,6 +801,9 @@ int main(int argc, const char **argv)
|
||||
mon = new Monitor(g_ceph_context, g_conf()->name.get_id(), store,
|
||||
msgr, mgr_msgr, &monmap);
|
||||
|
||||
mon->orig_argc = argc;
|
||||
mon->orig_argv = argv;
|
||||
|
||||
if (force_sync) {
|
||||
derr << "flagging a forced sync ..." << dendl;
|
||||
ostringstream oss;
|
||||
|
@ -366,6 +366,27 @@ bool HealthMonitor::check_leader_health()
|
||||
}
|
||||
}
|
||||
|
||||
// MON_MSGR2_NOT_ENABLED
|
||||
if (g_conf().get_val<bool>("ms_bind_msgr2") &&
|
||||
mon->monmap->get_required_features().contains_all(
|
||||
ceph::features::mon::FEATURE_NAUTILUS)) {
|
||||
list<string> details;
|
||||
for (auto& i : mon->monmap->mon_info) {
|
||||
if (!i.second.public_addrs.has_msgr2()) {
|
||||
ostringstream ds;
|
||||
ds << "mon." << i.first << " is not bound to a msgr2 port, only "
|
||||
<< i.second.public_addrs;
|
||||
details.push_back(ds.str());
|
||||
}
|
||||
}
|
||||
if (!details.empty()) {
|
||||
ostringstream ss;
|
||||
ss << details.size() << " monitors have not enabled msgr2";
|
||||
auto& d = next.add("MON_MSGR2_NOT_ENABLED", HEALTH_WARN, ss.str());
|
||||
d.detail.swap(details);
|
||||
}
|
||||
}
|
||||
|
||||
if (next != leader_checks) {
|
||||
changed = true;
|
||||
leader_checks = next;
|
||||
|
@ -451,6 +451,9 @@ COMMAND("mon set-rank " \
|
||||
"name=rank,type=CephInt",
|
||||
"set the rank for the specified mon",
|
||||
"mon", "rw")
|
||||
COMMAND("mon enable-msgr2",
|
||||
"enable the msgr2 protocol on port 3300",
|
||||
"mon", "rw")
|
||||
|
||||
/*
|
||||
* OSD commands
|
||||
|
@ -334,10 +334,17 @@ public:
|
||||
}
|
||||
int get_rank(const entity_addr_t& a) const {
|
||||
string n = get_name(a);
|
||||
if (n.empty())
|
||||
return -1;
|
||||
|
||||
return get_rank(n);
|
||||
if (!n.empty()) {
|
||||
return get_rank(n);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int get_rank(const entity_addrvec_t& av) const {
|
||||
string n = get_name(av);
|
||||
if (!n.empty()) {
|
||||
return get_rank(n);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
bool get_addr_name(const entity_addr_t& a, string& name) {
|
||||
if (addr_mons.count(a) == 0)
|
||||
@ -358,6 +365,7 @@ public:
|
||||
void set_addrvec(const string& n, const entity_addrvec_t& a) {
|
||||
ceph_assert(mon_info.count(n));
|
||||
mon_info[n].public_addrs = a;
|
||||
calc_addr_mons();
|
||||
}
|
||||
|
||||
void encode(bufferlist& blist, uint64_t con_features) const;
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "common/version.h"
|
||||
#include "common/blkdev.h"
|
||||
#include "common/cmdparse.h"
|
||||
#include "common/signal.h"
|
||||
|
||||
#include "osd/OSDMap.h"
|
||||
|
||||
@ -1029,6 +1030,54 @@ void Monitor::wait_for_paxos_write()
|
||||
}
|
||||
}
|
||||
|
||||
void Monitor::respawn()
|
||||
{
|
||||
dout(0) << __func__ << dendl;
|
||||
|
||||
char *new_argv[orig_argc+1];
|
||||
dout(1) << " e: '" << orig_argv[0] << "'" << dendl;
|
||||
for (int i=0; i<orig_argc; i++) {
|
||||
new_argv[i] = (char *)orig_argv[i];
|
||||
dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl;
|
||||
}
|
||||
new_argv[orig_argc] = NULL;
|
||||
|
||||
/* Determine the path to our executable, test if Linux /proc/self/exe exists.
|
||||
* This allows us to exec the same executable even if it has since been
|
||||
* unlinked.
|
||||
*/
|
||||
char exe_path[PATH_MAX] = "";
|
||||
#ifdef PROCPREFIX
|
||||
if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) != -1) {
|
||||
dout(1) << "respawning with exe " << exe_path << dendl;
|
||||
strcpy(exe_path, PROCPREFIX "/proc/self/exe");
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
/* Print CWD for the user's interest */
|
||||
char buf[PATH_MAX];
|
||||
char *cwd = getcwd(buf, sizeof(buf));
|
||||
ceph_assert(cwd);
|
||||
dout(1) << " cwd " << cwd << dendl;
|
||||
|
||||
/* Fall back to a best-effort: just running in our CWD */
|
||||
strncpy(exe_path, orig_argv[0], PATH_MAX-1);
|
||||
}
|
||||
|
||||
dout(1) << " exe_path " << exe_path << dendl;
|
||||
|
||||
unblock_all_signals(NULL);
|
||||
execv(exe_path, new_argv);
|
||||
|
||||
dout(0) << "respawn execv " << orig_argv[0]
|
||||
<< " failed with " << cpp_strerror(errno) << dendl;
|
||||
|
||||
// We have to assert out here, because suicide() returns, and callers
|
||||
// to respawn expect it never to return.
|
||||
ceph_abort();
|
||||
}
|
||||
|
||||
void Monitor::bootstrap()
|
||||
{
|
||||
dout(10) << "bootstrap" << dendl;
|
||||
@ -1045,7 +1094,7 @@ void Monitor::bootstrap()
|
||||
dout(10) << "monmap " << *monmap << dendl;
|
||||
|
||||
// note my rank
|
||||
int newrank = monmap->get_rank(messenger->get_myaddr());
|
||||
int newrank = monmap->get_rank(messenger->get_myaddrs());
|
||||
if (newrank < 0 && rank >= 0) {
|
||||
// was i ever part of the quorum?
|
||||
if (has_ever_joined) {
|
||||
@ -1053,6 +1102,14 @@ void Monitor::bootstrap()
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
if (newrank >= 0 &&
|
||||
monmap->get_addrs(newrank) != messenger->get_myaddrs()) {
|
||||
dout(0) << " monmap addrs for rank " << newrank << " changed, i am "
|
||||
<< messenger->get_myaddrs()
|
||||
<< ", monmap is " << monmap->get_addrs(newrank) << ", respawning"
|
||||
<< dendl;
|
||||
respawn();
|
||||
}
|
||||
if (newrank != rank) {
|
||||
dout(0) << " my rank is now " << newrank << " (was " << rank << ")" << dendl;
|
||||
messenger->set_myname(entity_name_t::MON(newrank));
|
||||
@ -1799,7 +1856,8 @@ void Monitor::handle_probe_probe(MonOpRequestRef op)
|
||||
void Monitor::handle_probe_reply(MonOpRequestRef op)
|
||||
{
|
||||
MMonProbe *m = static_cast<MMonProbe*>(op->get_req());
|
||||
dout(10) << "handle_probe_reply " << m->get_source_inst() << *m << dendl;
|
||||
dout(10) << "handle_probe_reply " << m->get_source_inst()
|
||||
<< " " << *m << dendl;
|
||||
dout(10) << " monmap is " << *monmap << dendl;
|
||||
|
||||
// discover name and addrs during probing or electing states.
|
||||
@ -1840,8 +1898,10 @@ void Monitor::handle_probe_reply(MonOpRequestRef op)
|
||||
bootstrap();
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
} else if (peer_name.size()) {
|
||||
dout(10) << " peer name is " << peer_name << dendl;
|
||||
} else {
|
||||
dout(10) << " peer " << m->get_source_addr() << " not in map" << dendl;
|
||||
}
|
||||
|
||||
// new initial peer?
|
||||
|
@ -125,6 +125,9 @@ public:
|
||||
class Monitor : public Dispatcher,
|
||||
public md_config_obs_t {
|
||||
public:
|
||||
int orig_argc = 0;
|
||||
const char **orig_argv = nullptr;
|
||||
|
||||
// me
|
||||
string name;
|
||||
int rank;
|
||||
@ -593,6 +596,7 @@ private:
|
||||
void _reset(); ///< called from bootstrap, start_, or join_election
|
||||
void wait_for_paxos_write();
|
||||
void _finish_svc_election(); ///< called by {win,lose}_election
|
||||
void respawn();
|
||||
public:
|
||||
void bootstrap();
|
||||
void join_election();
|
||||
|
@ -732,6 +732,32 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op)
|
||||
pending_map.set_rank(name, rank);
|
||||
pending_map.last_changed = ceph_clock_now();
|
||||
propose = true;
|
||||
} else if (prefix == "mon enable-msgr2") {
|
||||
if (!monmap.get_required_features().contains_all(
|
||||
ceph::features::mon::FEATURE_NAUTILUS)) {
|
||||
err = -EACCES;
|
||||
ss << "all monitors must be running nautilus to enable v2";
|
||||
goto reply;
|
||||
}
|
||||
for (auto& i : pending_map.mon_info) {
|
||||
if (i.second.public_addrs.v.size() == 1 &&
|
||||
i.second.public_addrs.front().is_legacy() &&
|
||||
i.second.public_addrs.front().get_port() == CEPH_MON_PORT_LEGACY) {
|
||||
entity_addrvec_t av;
|
||||
entity_addr_t a = i.second.public_addrs.front();
|
||||
a.set_type(entity_addr_t::TYPE_MSGR2);
|
||||
a.set_port(CEPH_MON_PORT_IANA);
|
||||
av.v.push_back(a);
|
||||
av.v.push_back(i.second.public_addrs.front());
|
||||
dout(10) << " setting mon." << i.first
|
||||
<< " addrs " << i.second.public_addrs
|
||||
<< " -> " << av << dendl;
|
||||
pending_map.set_addrvec(i.first, av);
|
||||
propose = true;
|
||||
pending_map.last_changed = ceph_clock_now();
|
||||
}
|
||||
}
|
||||
err = 0;
|
||||
} else {
|
||||
ss << "unknown command " << prefix;
|
||||
err = -EINVAL;
|
||||
|
@ -601,34 +601,42 @@ AsyncConnectionRef AsyncMessenger::create_connect(
|
||||
return conn;
|
||||
}
|
||||
|
||||
ConnectionRef AsyncMessenger::connect_to(int type, const entity_addrvec_t& addrs)
|
||||
{
|
||||
Mutex::Locker l(lock);
|
||||
if (*my_addrs == addrs ||
|
||||
(addrs.v.size() == 1 &&
|
||||
my_addrs->contains(addrs.front()))) {
|
||||
// local
|
||||
return local_connection;
|
||||
}
|
||||
|
||||
AsyncConnectionRef conn = _lookup_conn(addrs);
|
||||
if (conn) {
|
||||
ldout(cct, 10) << __func__ << " " << addrs << " existing " << conn << dendl;
|
||||
} else {
|
||||
conn = create_connect(addrs, type);
|
||||
ldout(cct, 10) << __func__ << " " << addrs << " new " << conn << dendl;
|
||||
}
|
||||
|
||||
return conn;
|
||||
}
|
||||
|
||||
ConnectionRef AsyncMessenger::get_loopback_connection()
|
||||
{
|
||||
return local_connection;
|
||||
}
|
||||
|
||||
int AsyncMessenger::_send_to(Message *m, int type, const entity_addrvec_t& addrs)
|
||||
entity_addrvec_t AsyncMessenger::_filter_addrs(int type,
|
||||
const entity_addrvec_t& addrs)
|
||||
{
|
||||
if (did_bind &&
|
||||
!get_myaddrs().has_msgr2() &&
|
||||
get_mytype() == type) {
|
||||
// if we are bound to v1 only, and we are connecting to a peer, we cannot
|
||||
// use the peer's v2 address (yet). otherwise the connection is assymetrical,
|
||||
// because they would have to use v1 to connect to us, and we would use v2,
|
||||
// and connection race detection etc would totally break down (among other
|
||||
// things).
|
||||
ldout(cct, 10) << __func__ << " " << addrs << " type " << type
|
||||
<< " limiting to v1 ()" << dendl;
|
||||
entity_addrvec_t r;
|
||||
for (auto& i : addrs.v) {
|
||||
if (i.is_msgr2()) {
|
||||
continue;
|
||||
}
|
||||
r.v.push_back(i);
|
||||
}
|
||||
return r;
|
||||
} else {
|
||||
return addrs;
|
||||
}
|
||||
}
|
||||
|
||||
int AsyncMessenger::send_to(Message *m, int type, const entity_addrvec_t& addrs)
|
||||
{
|
||||
Mutex::Locker l(lock);
|
||||
|
||||
FUNCTRACE(cct);
|
||||
ceph_assert(m);
|
||||
|
||||
@ -648,11 +656,35 @@ int AsyncMessenger::_send_to(Message *m, int type, const entity_addrvec_t& addrs
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
AsyncConnectionRef conn = _lookup_conn(addrs);
|
||||
submit_message(m, conn, addrs, type);
|
||||
auto av = _filter_addrs(type, addrs);
|
||||
AsyncConnectionRef conn = _lookup_conn(av);
|
||||
submit_message(m, conn, av, type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ConnectionRef AsyncMessenger::connect_to(int type, const entity_addrvec_t& addrs)
|
||||
{
|
||||
Mutex::Locker l(lock);
|
||||
if (*my_addrs == addrs ||
|
||||
(addrs.v.size() == 1 &&
|
||||
my_addrs->contains(addrs.front()))) {
|
||||
// local
|
||||
return local_connection;
|
||||
}
|
||||
|
||||
auto av = _filter_addrs(type, addrs);
|
||||
|
||||
AsyncConnectionRef conn = _lookup_conn(av);
|
||||
if (conn) {
|
||||
ldout(cct, 10) << __func__ << " " << av << " existing " << conn << dendl;
|
||||
} else {
|
||||
conn = create_connect(av, type);
|
||||
ldout(cct, 10) << __func__ << " " << av << " new " << conn << dendl;
|
||||
}
|
||||
|
||||
return conn;
|
||||
}
|
||||
|
||||
void AsyncMessenger::submit_message(Message *m, AsyncConnectionRef con,
|
||||
const entity_addrvec_t& dest_addrs,
|
||||
int dest_type)
|
||||
|
@ -137,11 +137,7 @@ public:
|
||||
* @defgroup Messaging
|
||||
* @{
|
||||
*/
|
||||
int send_to(Message *m, int type, const entity_addrvec_t& addrs) override {
|
||||
Mutex::Locker l(lock);
|
||||
|
||||
return _send_to(m, type, addrs);
|
||||
}
|
||||
int send_to(Message *m, int type, const entity_addrvec_t& addrs) override;
|
||||
|
||||
/** @} // Messaging */
|
||||
|
||||
@ -216,10 +212,12 @@ private:
|
||||
void submit_message(Message *m, AsyncConnectionRef con,
|
||||
const entity_addrvec_t& dest_addrs, int dest_type);
|
||||
|
||||
int _send_to(Message *m, int type, const entity_addrvec_t& addrs);
|
||||
void _finish_bind(const entity_addrvec_t& bind_addrs,
|
||||
const entity_addrvec_t& listen_addrs);
|
||||
|
||||
entity_addrvec_t _filter_addrs(int type,
|
||||
const entity_addrvec_t& addrs);
|
||||
|
||||
private:
|
||||
static const uint64_t ReapDeadConnectionThreshold = 5;
|
||||
|
||||
|
@ -609,6 +609,14 @@ struct entity_addrvec_t {
|
||||
}
|
||||
return entity_addr_t();
|
||||
}
|
||||
bool has_msgr2() const {
|
||||
for (auto& a : v) {
|
||||
if (a.is_msgr2()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool parse(const char *s, const char **end = 0);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user