mirror of
https://github.com/ceph/ceph
synced 2025-04-01 14:51:13 +00:00
mon/MDSMonitor: add 'mds ok-to-stop' command
Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
parent
cfba0acc01
commit
aa33a26e32
@ -574,6 +574,28 @@ function run_mgr() {
|
||||
"$@" || return 1
|
||||
}
|
||||
|
||||
function run_mds() {
|
||||
local dir=$1
|
||||
shift
|
||||
local id=$1
|
||||
shift
|
||||
local data=$dir/$id
|
||||
|
||||
ceph-mds \
|
||||
--id $id \
|
||||
$EXTRA_OPTS \
|
||||
--debug-mds 20 \
|
||||
--debug-objecter 20 \
|
||||
--debug-ms 20 \
|
||||
--chdir= \
|
||||
--mds-data=$data \
|
||||
--log-file=$dir/\$name.log \
|
||||
--admin-socket=$(get_asok_path) \
|
||||
--run-dir=$dir \
|
||||
--pid-file=$dir/\$name.pid \
|
||||
"$@" || return 1
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
|
||||
##
|
||||
|
@ -204,4 +204,38 @@ function TEST_5_mons_checks() {
|
||||
ceph mon ok-to-rm e || return 1
|
||||
}
|
||||
|
||||
function TEST_0_mds() {
|
||||
local dir=$1
|
||||
|
||||
CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
|
||||
|
||||
run_mon $dir a --public-addr=$CEPH_MON_A || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
run_mds $dir a || return 1
|
||||
|
||||
ceph osd pool create meta 1 || return 1
|
||||
ceph osd pool create data 1 || return 1
|
||||
ceph fs new myfs meta data || return 1
|
||||
sleep 5
|
||||
|
||||
! ceph mds ok-to-stop a || return 1
|
||||
! ceph mds ok-to-stop a dne || return 1
|
||||
ceph mds ok-to-stop dne || return 1
|
||||
|
||||
run_mds $dir b || return 1
|
||||
sleep 5
|
||||
|
||||
ceph mds ok-to-stop a || return 1
|
||||
ceph mds ok-to-stop b || return 1
|
||||
! ceph mds ok-to-stop a b || return 1
|
||||
ceph mds ok-to-stop a dne1 dne2 || return 1
|
||||
ceph mds ok-to-stop b dne || return 1
|
||||
! ceph mds ok-to-stop a b dne || return 1
|
||||
ceph mds ok-to-stop dne1 dne2 || return 1
|
||||
|
||||
kill_daemons $dir KILL mds.a
|
||||
}
|
||||
|
||||
|
||||
main ok-to-stop "$@"
|
||||
|
@ -65,15 +65,27 @@ public:
|
||||
* STANDBY_REPLAY for the gid `who`
|
||||
*/
|
||||
bool has_standby_replay(mds_gid_t who) const
|
||||
{
|
||||
return get_standby_replay(who) != MDS_GID_NONE;
|
||||
}
|
||||
mds_gid_t get_standby_replay(mds_gid_t who) const
|
||||
{
|
||||
for (const auto &i : mds_map.mds_info) {
|
||||
const auto &info = i.second;
|
||||
if (info.state == MDSMap::STATE_STANDBY_REPLAY
|
||||
&& info.rank == mds_map.mds_info.at(who).rank) {
|
||||
return true;
|
||||
return info.global_id;
|
||||
}
|
||||
}
|
||||
|
||||
return MDS_GID_NONE;
|
||||
}
|
||||
bool is_standby_replay(mds_gid_t who) const
|
||||
{
|
||||
auto p = mds_map.mds_info.find(who);
|
||||
if (p != mds_map.mds_info.end() &&
|
||||
p->second.state == MDSMap::STATE_STANDBY_REPLAY) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -173,6 +185,19 @@ public:
|
||||
return legacy_client_fscid;
|
||||
}
|
||||
|
||||
size_t get_num_standby() const {
|
||||
return standby_daemons.size();
|
||||
}
|
||||
|
||||
bool is_any_degraded() const {
|
||||
for (auto& i : filesystems) {
|
||||
if (i.second->mds_map.is_degraded()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get state of all daemons (for all filesystems, including all standbys)
|
||||
*/
|
||||
@ -370,6 +395,16 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool is_standby_replay(mds_gid_t who) const
|
||||
{
|
||||
return filesystems.at(mds_roles.at(who))->is_standby_replay(who);
|
||||
}
|
||||
|
||||
mds_gid_t get_standby_replay(mds_gid_t who) const
|
||||
{
|
||||
return filesystems.at(mds_roles.at(who))->get_standby_replay(who);
|
||||
}
|
||||
|
||||
/**
|
||||
* A daemon has told us it's compat, and it's too new
|
||||
* for the one we had previously. Impose the new one
|
||||
|
@ -897,6 +897,57 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
|
||||
ds << fsmap;
|
||||
}
|
||||
r = 0;
|
||||
} else if (prefix == "mds ok-to-stop") {
|
||||
vector<string> ids;
|
||||
if (!cmd_getval(g_ceph_context, cmdmap, "ids", ids)) {
|
||||
r = -EINVAL;
|
||||
ss << "must specify mds id";
|
||||
goto out;
|
||||
}
|
||||
if (fsmap.is_any_degraded()) {
|
||||
ss << "one or more filesystems is currently degraded";
|
||||
r = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
set<mds_gid_t> stopping;
|
||||
for (auto& id : ids) {
|
||||
ostringstream ess;
|
||||
mds_gid_t gid = gid_from_arg(fsmap, id, ess);
|
||||
if (gid == MDS_GID_NONE) {
|
||||
// the mds doesn't exist, but no file systems are unhappy, so losing it
|
||||
// can't have any effect.
|
||||
continue;
|
||||
}
|
||||
stopping.insert(gid);
|
||||
}
|
||||
set<mds_gid_t> active;
|
||||
set<mds_gid_t> standby;
|
||||
for (auto gid : stopping) {
|
||||
if (fsmap.gid_has_rank(gid)) {
|
||||
// ignore standby-replay daemons (at this level)
|
||||
if (!fsmap.is_standby_replay(gid)) {
|
||||
auto standby = fsmap.get_standby_replay(gid);
|
||||
if (standby == MDS_GID_NONE ||
|
||||
stopping.count(standby)) {
|
||||
// no standby-replay, or we're also stopping the standby-replay
|
||||
// for this mds
|
||||
active.insert(gid);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// net loss of a standby
|
||||
standby.insert(gid);
|
||||
}
|
||||
}
|
||||
if (fsmap.get_num_standby() - standby.size() < active.size()) {
|
||||
r = -EBUSY;
|
||||
ss << "insufficent standby MDS daemons to stop active gids "
|
||||
<< stringify(active)
|
||||
<< " and/or standby gids " << stringify(standby);;
|
||||
goto out;
|
||||
}
|
||||
r = 0;
|
||||
ss << "should be safe to stop " << ids;
|
||||
} else if (prefix == "fs dump") {
|
||||
int64_t epocharg;
|
||||
epoch_t epoch;
|
||||
|
@ -325,6 +325,9 @@ COMMAND_WITH_FLAG("mds stop name=role,type=CephString", "stop mds", \
|
||||
COMMAND_WITH_FLAG("mds deactivate name=role,type=CephString",
|
||||
"clean up specified MDS rank (use with `set max_mds` to shrink cluster)", \
|
||||
"mds", "rw", FLAG(OBSOLETE))
|
||||
COMMAND("mds ok-to-stop name=ids,type=CephString,n=N",
|
||||
"check whether stopping the specified MDS would reduce immediate availability",
|
||||
"mds", "r")
|
||||
COMMAND_WITH_FLAG("mds set_max_mds " \
|
||||
"name=maxmds,type=CephInt,range=0", \
|
||||
"set max MDS index", "mds", "rw", FLAG(OBSOLETE))
|
||||
|
Loading…
Reference in New Issue
Block a user