From 5f0fc0a0913e3e85cb031ae10e8e481d01d4b21d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 8 Jan 2019 14:31:13 -0600 Subject: [PATCH 01/11] mon/MonMap: calc_addr_mons() after setting rank addrvec Signed-off-by: Sage Weil --- src/mon/MonMap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mon/MonMap.h b/src/mon/MonMap.h index 7ea959e6737..5a852807ddb 100644 --- a/src/mon/MonMap.h +++ b/src/mon/MonMap.h @@ -351,6 +351,7 @@ public: void set_addrvec(const string& n, const entity_addrvec_t& a) { ceph_assert(mon_info.count(n)); mon_info[n].public_addrs = a; + calc_addr_mons(); } void encode(bufferlist& blist, uint64_t con_features) const; From 2986a1b70ebe6e98b27503f58c2afcc1ee81c32c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 8 Jan 2019 16:53:13 -0600 Subject: [PATCH 02/11] mon: respawn if rank addr changes If the address for our rank change, then respawn! Signed-off-by: Sage Weil --- src/ceph_mon.cc | 3 +++ src/mon/Monitor.cc | 57 ++++++++++++++++++++++++++++++++++++++++++++++ src/mon/Monitor.h | 4 ++++ 3 files changed, 64 insertions(+) diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc index e47cf22cc8c..a932a3b1b67 100644 --- a/src/ceph_mon.cc +++ b/src/ceph_mon.cc @@ -775,6 +775,9 @@ int main(int argc, const char **argv) mon = new Monitor(g_ceph_context, g_conf()->name.get_id(), store, msgr, mgr_msgr, &monmap); + mon->orig_argc = argc; + mon->orig_argv = argv; + if (force_sync) { derr << "flagging a forced sync ..." << dendl; ostringstream oss; diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index eb839bcaf81..880b39541e0 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -30,6 +30,7 @@ #include "common/version.h" #include "common/blkdev.h" #include "common/cmdparse.h" +#include "common/signal.h" #include "osd/OSDMap.h" @@ -1021,6 +1022,54 @@ void Monitor::wait_for_paxos_write() } } +void Monitor::respawn() +{ + dout(0) << __func__ << dendl; + + char *new_argv[orig_argc+1]; + dout(1) << " e: '" << orig_argv[0] << "'" << dendl; + for (int i=0; i= 0 && + monmap->get_addrs(newrank) != messenger->get_myaddrs()) { + dout(0) << " monmap addrs for rank " << newrank << " changed, i am " + << messenger->get_myaddrs() + << ", monmap is " << monmap->get_addrs(newrank) << ", respawning" + << dendl; + respawn(); + } if (newrank != rank) { dout(0) << " my rank is now " << newrank << " (was " << rank << ")" << dendl; messenger->set_myname(entity_name_t::MON(newrank)); diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index a2b17f77fe6..d0c662d7499 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -125,6 +125,9 @@ public: class Monitor : public Dispatcher, public md_config_obs_t { public: + int orig_argc = 0; + const char **orig_argv = nullptr; + // me string name; int rank; @@ -593,6 +596,7 @@ private: void _reset(); ///< called from bootstrap, start_, or join_election void wait_for_paxos_write(); void _finish_svc_election(); ///< called by {win,lose}_election + void respawn(); public: void bootstrap(); void join_election(); From 10a4a7fbda3ceb0f578551ea6e5aa202abdb9ea3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 8 Jan 2019 16:54:21 -0600 Subject: [PATCH 03/11] mon/MonMapMonitor: add 'mon enable-msgr2' command This switches any monitor listening on v1:...:6789 only to v2 (on 3300) + v1 (on 6789). It should be run after upgrading all monitors to nautilus. Signed-off-by: Sage Weil --- src/mon/MonCommands.h | 3 +++ src/mon/MonmapMonitor.cc | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 7e10e2628f3..cf909f2939a 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -451,6 +451,9 @@ COMMAND("mon set-rank " \ "name=rank,type=CephInt", "set the rank for the specified mon", "mon", "rw") +COMMAND("mon enable-msgr2", + "enable the msgr2 protocol on port 3300", + "mon", "rw") /* * OSD commands diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 97e36e49145..b3bb2f89179 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -732,6 +732,32 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) pending_map.set_rank(name, rank); pending_map.last_changed = ceph_clock_now(); propose = true; + } else if (prefix == "mon enable-msgr2") { + if (!monmap.get_required_features().contains_all( + ceph::features::mon::FEATURE_NAUTILUS)) { + err = -EACCES; + ss << "all monitors must be running nautilus to enable v2"; + goto reply; + } + for (auto& i : pending_map.mon_info) { + if (i.second.public_addrs.v.size() == 1 && + i.second.public_addrs.front().is_legacy() && + i.second.public_addrs.front().get_port() == CEPH_MON_PORT_LEGACY) { + entity_addrvec_t av; + entity_addr_t a = i.second.public_addrs.front(); + a.set_type(entity_addr_t::TYPE_MSGR2); + a.set_port(CEPH_MON_PORT_IANA); + av.v.push_back(a); + av.v.push_back(i.second.public_addrs.front()); + dout(10) << " setting mon." << i.first + << " addrs " << i.second.public_addrs + << " -> " << av << dendl; + pending_map.set_addrvec(i.first, av); + propose = true; + pending_map.last_changed = ceph_clock_now(); + } + } + err = 0; } else { ss << "unknown command " << prefix; err = -EINVAL; From 57c4795c006c1dcac26b645e36c0ec81f820a500 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 8 Jan 2019 21:38:42 -0600 Subject: [PATCH 04/11] doc/rados/operations/health-checks: document MON_* health warnings Signed-off-by: Sage Weil --- doc/rados/operations/health-checks.rst | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/doc/rados/operations/health-checks.rst b/doc/rados/operations/health-checks.rst index d43941df8de..d93fc6d2965 100644 --- a/doc/rados/operations/health-checks.rst +++ b/doc/rados/operations/health-checks.rst @@ -21,6 +21,37 @@ that are defined by ceph-mgr python modules. Definitions =========== +Monitor +------- + +MON_DOWN +________ + +One or more monitor daemons is currently down. The cluster requires a +majority (more than 1/2) of the monitors in order to function. When +one or more monitors are down, clients may have a harder time forming +their initial connection to the cluster as they may need to try more +addresses before they reach an operating monitor. + +The down monitor daemon should generally be restarted as soon as +possible to reduce the risk of a subsequen monitor failure leading to +a service outage. + +MON_CLOCK_SKEW +______________ + +The clocks on the hosts running the ceph-mon monitor daemons are not +sufficiently well synchronized. This health alert is raised if the +cluster detects a clock skew greater than ``mon_clock_drift_allowed``. + +This is best resolved by synchronizing the clocks using a tool like +``ntpd`` or ``chrony``. + +If it is impractical to keep the clocks closely synchronized, the +``mon_clock_drift_allowed`` threshold can also be increased, but this +value must stay significantly below the ``mon_lease`` interval in +order for monitor cluster to function properly. + Manager ------- From 6ba8db68cd1cc8f6fdecda83266f2525f1a8dcd0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 8 Jan 2019 21:53:45 -0600 Subject: [PATCH 05/11] mon/HealthMonitor: raise MON_MSGR2_NOT_ENABLED if mons not bound to msgr2 If the ms_bind_msgr2 option is enabled, and all mons are nautilus, raise a health alert if any mons aren't bound to msgr2 addresses. Whitelist tests that mon_bind_addrvec=false or mon_bind_msgr2=false. Signed-off-by: Sage Weil --- doc/rados/operations/health-checks.rst | 20 ++++++++++++++++++ qa/suites/rados/thrash-old-clients/ceph.yaml | 2 ++ .../luminous-x-singleton/0-cluster/start.yaml | 2 ++ src/mon/HealthMonitor.cc | 21 +++++++++++++++++++ src/msg/msg_types.h | 8 +++++++ 5 files changed, 53 insertions(+) diff --git a/doc/rados/operations/health-checks.rst b/doc/rados/operations/health-checks.rst index d93fc6d2965..06a52aedaac 100644 --- a/doc/rados/operations/health-checks.rst +++ b/doc/rados/operations/health-checks.rst @@ -52,6 +52,26 @@ If it is impractical to keep the clocks closely synchronized, the value must stay significantly below the ``mon_lease`` interval in order for monitor cluster to function properly. +MON_MSGR2_NOT_ENABLED +_____________________ + +The ``ms_bind_msgr2`` option is enabled but one or more monitors is +not configured to bind to a v2 port in the cluster's monmap. This +means that features specific to the msgr2 protocol (e.g., encryption) +are not available on some or all connections. + +In most cases this can be corrected by issuing the command:: + + ceph mon enable-msgr2 + +That command will change any monitor configured for the old default +port 6789 to continue to listen for v1 connections on 6789 and also +listen for v2 connections on the new default 3300 port. + +If a monitor is configured to listen for v1 connections on a non-standard port (not 6789), then the monmap will need to be modified manually. + + + Manager ------- diff --git a/qa/suites/rados/thrash-old-clients/ceph.yaml b/qa/suites/rados/thrash-old-clients/ceph.yaml index 1b9c8568005..b0c28783375 100644 --- a/qa/suites/rados/thrash-old-clients/ceph.yaml +++ b/qa/suites/rados/thrash-old-clients/ceph.yaml @@ -1,3 +1,5 @@ tasks: - ceph: mon_bind_addrvec: false + log-whitelist: + - \(MON_MSGR2_NOT_ENABLED\) diff --git a/qa/suites/rados/upgrade/luminous-x-singleton/0-cluster/start.yaml b/qa/suites/rados/upgrade/luminous-x-singleton/0-cluster/start.yaml index c67ec972d91..308e1c60b23 100644 --- a/qa/suites/rados/upgrade/luminous-x-singleton/0-cluster/start.yaml +++ b/qa/suites/rados/upgrade/luminous-x-singleton/0-cluster/start.yaml @@ -7,6 +7,8 @@ overrides: ceph: mon_bind_addrvec: false mon_bind_msgr2: false + log-whitelist: + - \(MON_MSGR2_NOT_ENABLED\) fs: xfs conf: global: diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc index e255b16e2f1..84594251474 100644 --- a/src/mon/HealthMonitor.cc +++ b/src/mon/HealthMonitor.cc @@ -366,6 +366,27 @@ bool HealthMonitor::check_leader_health() } } + // MON_MSGR2_NOT_ENABLED + if (g_conf().get_val("ms_bind_msgr2") && + mon->monmap->get_required_features().contains_all( + ceph::features::mon::FEATURE_NAUTILUS)) { + list details; + for (auto& i : mon->monmap->mon_info) { + if (!i.second.public_addrs.has_msgr2()) { + ostringstream ds; + ds << "mon." << i.first << " is not bound to a msgr2 port, only " + << i.second.public_addrs; + details.push_back(ds.str()); + } + } + if (!details.empty()) { + ostringstream ss; + ss << details.size() << " monitors have not enabled msgr2"; + auto& d = next.add("MON_MSGR2_NOT_ENABLED", HEALTH_WARN, ss.str()); + d.detail.swap(details); + } + } + if (next != leader_checks) { changed = true; leader_checks = next; diff --git a/src/msg/msg_types.h b/src/msg/msg_types.h index 1049ad540a0..8ae0bd52806 100644 --- a/src/msg/msg_types.h +++ b/src/msg/msg_types.h @@ -598,6 +598,14 @@ struct entity_addrvec_t { } return entity_addr_t(); } + bool has_msgr2() const { + for (auto& a : v) { + if (a.is_msgr2()) { + return true; + } + } + return false; + } bool parse(const char *s, const char **end = 0); From 9459c6118abe9370955ac362c8565c1b232e4293 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 15 Jan 2019 10:24:10 -0600 Subject: [PATCH 06/11] msg/async/AsyncMessenger: drop single-use _send_to Signed-off-by: Sage Weil --- src/msg/async/AsyncMessenger.cc | 4 +++- src/msg/async/AsyncMessenger.h | 7 +------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/msg/async/AsyncMessenger.cc b/src/msg/async/AsyncMessenger.cc index d310e0cd7cc..8d8ed05107c 100644 --- a/src/msg/async/AsyncMessenger.cc +++ b/src/msg/async/AsyncMessenger.cc @@ -627,8 +627,10 @@ ConnectionRef AsyncMessenger::get_loopback_connection() return local_connection; } -int AsyncMessenger::_send_to(Message *m, int type, const entity_addrvec_t& addrs) +int AsyncMessenger::send_to(Message *m, int type, const entity_addrvec_t& addrs) { + Mutex::Locker l(lock); + FUNCTRACE(cct); ceph_assert(m); diff --git a/src/msg/async/AsyncMessenger.h b/src/msg/async/AsyncMessenger.h index b12dce625fd..2a91e0cf401 100644 --- a/src/msg/async/AsyncMessenger.h +++ b/src/msg/async/AsyncMessenger.h @@ -137,11 +137,7 @@ public: * @defgroup Messaging * @{ */ - int send_to(Message *m, int type, const entity_addrvec_t& addrs) override { - Mutex::Locker l(lock); - - return _send_to(m, type, addrs); - } + int send_to(Message *m, int type, const entity_addrvec_t& addrs) override; /** @} // Messaging */ @@ -216,7 +212,6 @@ private: void submit_message(Message *m, AsyncConnectionRef con, const entity_addrvec_t& dest_addrs, int dest_type); - int _send_to(Message *m, int type, const entity_addrvec_t& addrs); void _finish_bind(const entity_addrvec_t& bind_addrs, const entity_addrvec_t& listen_addrs); From cb5ada19702d387724f4fb93e0f7763a0d3bb0ae Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 15 Jan 2019 10:38:29 -0600 Subject: [PATCH 07/11] msg/async: use v1 for v1 <-> [v2,v1] peers If *peers* are communicating, i.e. there may be bidirectional connection attempts, we must use the same protocol version from both ends or else we will get very confused. Fix this by forcing the use of v1 when we - are bound to a v1 endpoint only (people can't connect to us via v2) - we are connecting to a *peer* If it is a non-peer, then connections are uni-directional. If we both have v2, we will both use v2. If we ever switch to [v2,v1], it will be as part of a restart. Signed-off-by: Sage Weil --- src/msg/async/AsyncMessenger.cc | 74 +++++++++++++++++++++++---------- src/msg/async/AsyncMessenger.h | 3 ++ 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/src/msg/async/AsyncMessenger.cc b/src/msg/async/AsyncMessenger.cc index 8d8ed05107c..7f34f886b83 100644 --- a/src/msg/async/AsyncMessenger.cc +++ b/src/msg/async/AsyncMessenger.cc @@ -601,32 +601,38 @@ AsyncConnectionRef AsyncMessenger::create_connect( return conn; } -ConnectionRef AsyncMessenger::connect_to(int type, const entity_addrvec_t& addrs) -{ - Mutex::Locker l(lock); - if (*my_addrs == addrs || - (addrs.v.size() == 1 && - my_addrs->contains(addrs.front()))) { - // local - return local_connection; - } - - AsyncConnectionRef conn = _lookup_conn(addrs); - if (conn) { - ldout(cct, 10) << __func__ << " " << addrs << " existing " << conn << dendl; - } else { - conn = create_connect(addrs, type); - ldout(cct, 10) << __func__ << " " << addrs << " new " << conn << dendl; - } - - return conn; -} ConnectionRef AsyncMessenger::get_loopback_connection() { return local_connection; } +entity_addrvec_t AsyncMessenger::_filter_addrs(int type, + const entity_addrvec_t& addrs) +{ + if (did_bind && + !get_myaddrs().has_msgr2() && + get_mytype() == type) { + // if we are bound to v1 only, and we are connecting to a peer, we cannot + // use the peer's v2 address (yet). otherwise the connection is assymetrical, + // because they would have to use v1 to connect to us, and we would use v2, + // and connection race detection etc would totally break down (among other + // things). + ldout(cct, 10) << __func__ << " " << addrs << " type " << type + << " limiting to v1 ()" << dendl; + entity_addrvec_t r; + for (auto& i : addrs.v) { + if (i.is_msgr2()) { + continue; + } + r.v.push_back(i); + } + return r; + } else { + return addrs; + } +} + int AsyncMessenger::send_to(Message *m, int type, const entity_addrvec_t& addrs) { Mutex::Locker l(lock); @@ -650,11 +656,35 @@ int AsyncMessenger::send_to(Message *m, int type, const entity_addrvec_t& addrs) return -EINVAL; } - AsyncConnectionRef conn = _lookup_conn(addrs); - submit_message(m, conn, addrs, type); + auto av = _filter_addrs(type, addrs); + AsyncConnectionRef conn = _lookup_conn(av); + submit_message(m, conn, av, type); return 0; } +ConnectionRef AsyncMessenger::connect_to(int type, const entity_addrvec_t& addrs) +{ + Mutex::Locker l(lock); + if (*my_addrs == addrs || + (addrs.v.size() == 1 && + my_addrs->contains(addrs.front()))) { + // local + return local_connection; + } + + auto av = _filter_addrs(type, addrs); + + AsyncConnectionRef conn = _lookup_conn(av); + if (conn) { + ldout(cct, 10) << __func__ << " " << av << " existing " << conn << dendl; + } else { + conn = create_connect(av, type); + ldout(cct, 10) << __func__ << " " << av << " new " << conn << dendl; + } + + return conn; +} + void AsyncMessenger::submit_message(Message *m, AsyncConnectionRef con, const entity_addrvec_t& dest_addrs, int dest_type) diff --git a/src/msg/async/AsyncMessenger.h b/src/msg/async/AsyncMessenger.h index 2a91e0cf401..666187a720d 100644 --- a/src/msg/async/AsyncMessenger.h +++ b/src/msg/async/AsyncMessenger.h @@ -215,6 +215,9 @@ private: void _finish_bind(const entity_addrvec_t& bind_addrs, const entity_addrvec_t& listen_addrs); + entity_addrvec_t _filter_addrs(int type, + const entity_addrvec_t& addrs); + private: static const uint64_t ReapDeadConnectionThreshold = 5; From 0c2f792aa7cc01c7e77132a62663f02cf5d746e4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 15 Jan 2019 08:53:31 -0600 Subject: [PATCH 08/11] mon: clean up probe debug output a bit Signed-off-by: Sage Weil --- src/mon/Monitor.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 880b39541e0..793e037bc3d 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -1829,7 +1829,8 @@ void Monitor::handle_probe_probe(MonOpRequestRef op) void Monitor::handle_probe_reply(MonOpRequestRef op) { MMonProbe *m = static_cast(op->get_req()); - dout(10) << "handle_probe_reply " << m->get_source_inst() << *m << dendl; + dout(10) << "handle_probe_reply " << m->get_source_inst() + << " " << *m << dendl; dout(10) << " monmap is " << *monmap << dendl; // discover name and addrs during probing or electing states. @@ -1870,8 +1871,10 @@ void Monitor::handle_probe_reply(MonOpRequestRef op) bootstrap(); return; } - } else { + } else if (peer_name.size()) { dout(10) << " peer name is " << peer_name << dendl; + } else { + dout(10) << " peer " << m->get_source_addr() << " not in map" << dendl; } // new initial peer? From b7273450df6dfca08e47694d9b74837a5753f58d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 15 Jan 2019 10:42:18 -0600 Subject: [PATCH 09/11] mon: make bootstrap rank check more robust If we search for myaddrs, then we will detect our rank properly if we were [a,b] and the a addr got removed. Signed-off-by: Sage Weil --- src/mon/MonMap.h | 15 +++++++++++---- src/mon/Monitor.cc | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/mon/MonMap.h b/src/mon/MonMap.h index 5a852807ddb..f5a230c7129 100644 --- a/src/mon/MonMap.h +++ b/src/mon/MonMap.h @@ -327,10 +327,17 @@ public: } int get_rank(const entity_addr_t& a) const { string n = get_name(a); - if (n.empty()) - return -1; - - return get_rank(n); + if (!n.empty()) { + return get_rank(n); + } + return -1; + } + int get_rank(const entity_addrvec_t& av) const { + string n = get_name(av); + if (!n.empty()) { + return get_rank(n); + } + return -1; } bool get_addr_name(const entity_addr_t& a, string& name) { if (addr_mons.count(a) == 0) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 793e037bc3d..71a0b0e5b10 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -1086,7 +1086,7 @@ void Monitor::bootstrap() dout(10) << "monmap " << *monmap << dendl; // note my rank - int newrank = monmap->get_rank(messenger->get_myaddr()); + int newrank = monmap->get_rank(messenger->get_myaddrs()); if (newrank < 0 && rank >= 0) { // was i ever part of the quorum? if (has_ever_joined) { From f7b6a4b44093bbed8e6cc662758aff5c24f00452 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 16 Jan 2019 21:42:31 -0600 Subject: [PATCH 10/11] qa/rados/thrash-old-clients: avoid msgr2 Signed-off-by: Sage Weil --- qa/suites/rados/thrash-old-clients/ceph.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qa/suites/rados/thrash-old-clients/ceph.yaml b/qa/suites/rados/thrash-old-clients/ceph.yaml index b0c28783375..364f9d03b06 100644 --- a/qa/suites/rados/thrash-old-clients/ceph.yaml +++ b/qa/suites/rados/thrash-old-clients/ceph.yaml @@ -1,5 +1,7 @@ tasks: - ceph: mon_bind_addrvec: false - log-whitelist: - - \(MON_MSGR2_NOT_ENABLED\) + mon_bind_msgr2: false + conf: + global: + ms bind msgr2: false From 7a89787cde24fafd7efc27822742b401f8f21c39 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 16 Jan 2019 17:27:37 -0600 Subject: [PATCH 11/11] qa/suites/rados/upgrade: one mon per node, and enable-msgr2 at end Signed-off-by: Sage Weil --- .../luminous-x-singleton/0-cluster/start.yaml | 15 ++++++++++----- .../2-partial-upgrade/firsthalf.yaml | 9 +++++---- .../luminous-x-singleton/6-finish-upgrade.yaml | 9 ++++++--- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/qa/suites/rados/upgrade/luminous-x-singleton/0-cluster/start.yaml b/qa/suites/rados/upgrade/luminous-x-singleton/0-cluster/start.yaml index 308e1c60b23..3c65ea9ccc5 100644 --- a/qa/suites/rados/upgrade/luminous-x-singleton/0-cluster/start.yaml +++ b/qa/suites/rados/upgrade/luminous-x-singleton/0-cluster/start.yaml @@ -7,26 +7,31 @@ overrides: ceph: mon_bind_addrvec: false mon_bind_msgr2: false - log-whitelist: - - \(MON_MSGR2_NOT_ENABLED\) fs: xfs conf: global: ms dump corrupt message level: 0 + ms bind msgr2: false mds: debug ms: 1 debug mds: 20 roles: - - mon.a - - mon.c - mgr.x - - mgr.y - mds.a - osd.0 - osd.1 - osd.2 -- - mon.b - osd.3 +- - mon.b - osd.4 - osd.5 + - osd.6 + - osd.7 +- - mon.c + - mgr.y + - osd.8 + - osd.9 + - osd.10 + - osd.11 - - client.0 diff --git a/qa/suites/rados/upgrade/luminous-x-singleton/2-partial-upgrade/firsthalf.yaml b/qa/suites/rados/upgrade/luminous-x-singleton/2-partial-upgrade/firsthalf.yaml index 51f32ccc13d..a6cf4a46094 100644 --- a/qa/suites/rados/upgrade/luminous-x-singleton/2-partial-upgrade/firsthalf.yaml +++ b/qa/suites/rados/upgrade/luminous-x-singleton/2-partial-upgrade/firsthalf.yaml @@ -5,13 +5,14 @@ meta: restart : osd.0,1,2,3,4,5 tasks: - install.upgrade: - osd.0: + mon.a: + mon.b: - print: "**** done install.upgrade osd.0" - ceph.restart: - daemons: [mon.a, mon.c] + daemons: [mon.a, mon.b] wait-for-healthy: false mon-health-to-clog: false - ceph.restart: - daemons: [osd.0, osd.1, osd.2] + daemons: [osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, osd.6, osd.7] wait-for-healthy: false -- print: "**** done ceph.restart 1st half" +- print: "**** done ceph.restart 1st 2/3s" diff --git a/qa/suites/rados/upgrade/luminous-x-singleton/6-finish-upgrade.yaml b/qa/suites/rados/upgrade/luminous-x-singleton/6-finish-upgrade.yaml index bd917f24f90..e7fa4b2f47c 100644 --- a/qa/suites/rados/upgrade/luminous-x-singleton/6-finish-upgrade.yaml +++ b/qa/suites/rados/upgrade/luminous-x-singleton/6-finish-upgrade.yaml @@ -10,18 +10,21 @@ overrides: - \(MDS_ tasks: - install.upgrade: - osd.3: + mon.c: - ceph.restart: - daemons: [mon.b, mgr.x, mgr.y] + daemons: [mon.c, mgr.x, mgr.y] wait-for-up: true wait-for-healthy: false - ceph.restart: - daemons: [osd.3, osd.4, osd.5] + daemons: [osd.8, osd.9, osd.10, osd.11] wait-for-up: true wait-for-healthy: false - ceph.restart: daemons: [mds.a] wait-for-up: true wait-for-healthy: false +- exec: + mon.a: + - ceph mon enable-msgr2 - install.upgrade: client.0: