mirror of
https://github.com/ceph/ceph
synced 2025-03-22 10:17:23 +00:00
MDSMonitor: add fs fail command
This command sets the fs as not joinable and fails all ranks. This is a simpler command than the typical sequence: (a) set fs not joinable; (b) iterate through and fail ranks. It also does this in a single FSMap update. Fixes: http://tracker.ceph.com/issues/37085 Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
parent
38a99f04f4
commit
4c49f165ec
@ -260,6 +260,10 @@
|
||||
specified in the ``[global]`` section to allow daemons and clients
|
||||
to discover the monitors.
|
||||
|
||||
* New command `fs fail` has been added to quickly bring down a file
|
||||
system. This is a single command that unsets the joinable flag on the file
|
||||
system and brings down all of its ranks.
|
||||
|
||||
|
||||
Upgrading from Luminous
|
||||
-----------------------
|
||||
|
@ -98,9 +98,17 @@ client I/O is stopped.
|
||||
Taking the cluster down rapidly for deletion or disaster recovery
|
||||
-----------------------------------------------------------------
|
||||
|
||||
To allow rapidly deleting a file system (for testing) or to quickly bring MDS
|
||||
daemons down, the operator may also set a flag to prevent standbys from
|
||||
activating on the file system. This is done using the ``joinable`` flag:
|
||||
To allow rapidly deleting a file system (for testing) or to quickly bring the
|
||||
file system and MDS daemons down, use the ``fs fail`` command:
|
||||
|
||||
::
|
||||
|
||||
fs fail <fs_name>
|
||||
|
||||
This command sets a file system flag to prevent standbys from
|
||||
activating on the file system (the ``joinable`` flag).
|
||||
|
||||
This process can also be done manually by doing the following:
|
||||
|
||||
::
|
||||
|
||||
@ -117,6 +125,12 @@ respawn as standbys. The file system will be left in a degraded state.
|
||||
Once all ranks are inactive, the file system may also be deleted or left in
|
||||
this state for other purposes (perhaps disaster recovery).
|
||||
|
||||
To bring the cluster back up, simply set the joinable flag:
|
||||
|
||||
::
|
||||
|
||||
fs set <fs_name> joinable true
|
||||
|
||||
|
||||
Daemons
|
||||
-------
|
||||
|
@ -76,6 +76,61 @@ class FlagSetHandler : public FileSystemCommandHandler
|
||||
}
|
||||
};
|
||||
|
||||
class FailHandler : public FileSystemCommandHandler
|
||||
{
|
||||
public:
|
||||
FailHandler()
|
||||
: FileSystemCommandHandler("fs fail")
|
||||
{
|
||||
}
|
||||
|
||||
int handle(
|
||||
Monitor* mon,
|
||||
FSMap& fsmap,
|
||||
MonOpRequestRef op,
|
||||
const cmdmap_t& cmdmap,
|
||||
std::stringstream& ss) override
|
||||
{
|
||||
if (!mon->osdmon()->is_writeable()) {
|
||||
// not allowed to write yet, so retry when we can
|
||||
mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
std::string fs_name;
|
||||
if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name) || fs_name.empty()) {
|
||||
ss << "Missing filesystem name";
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
auto fs = fsmap.get_filesystem(fs_name);
|
||||
if (fs == nullptr) {
|
||||
ss << "Not found: '" << fs_name << "'";
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
auto f = [](auto fs) {
|
||||
fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
|
||||
};
|
||||
fsmap.modify_filesystem(fs->fscid, std::move(f));
|
||||
|
||||
std::vector<mds_gid_t> to_fail;
|
||||
for (const auto& p : fs->mds_map.get_mds_info()) {
|
||||
to_fail.push_back(p.first);
|
||||
}
|
||||
|
||||
for (const auto& gid : to_fail) {
|
||||
mon->mdsmon()->fail_mds_gid(fsmap, gid);
|
||||
}
|
||||
mon->osdmon()->propose_pending();
|
||||
|
||||
ss << fs_name;
|
||||
ss << " marked not joinable; MDS cannot join the cluster. All MDS ranks marked failed.";
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
class FsNewHandler : public FileSystemCommandHandler
|
||||
{
|
||||
public:
|
||||
@ -691,7 +746,7 @@ class RemoveFilesystemHandler : public FileSystemCommandHandler
|
||||
|
||||
// Check that no MDS daemons are active
|
||||
if (fs->mds_map.get_num_up_mds() > 0) {
|
||||
ss << "all MDS daemons must be inactive before removing filesystem";
|
||||
ss << "all MDS daemons must be inactive/failed before removing filesystem. See `ceph fs fail`.";
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -878,6 +933,7 @@ FileSystemCommandHandler::load(Paxos *paxos)
|
||||
std::list<std::shared_ptr<FileSystemCommandHandler> > handlers;
|
||||
|
||||
handlers.push_back(std::make_shared<SetHandler>());
|
||||
handlers.push_back(std::make_shared<FailHandler>());
|
||||
handlers.push_back(std::make_shared<FlagSetHandler>());
|
||||
handlers.push_back(std::make_shared<AddDataPoolHandler>(paxos));
|
||||
handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
|
||||
|
@ -371,6 +371,10 @@ COMMAND("fs new " \
|
||||
"name=allow_dangerous_metadata_overlay,type=CephBool,req=false", \
|
||||
"make new filesystem using named pools <metadata> and <data>", \
|
||||
"fs", "rw")
|
||||
COMMAND("fs fail " \
|
||||
"name=fs_name,type=CephString ", \
|
||||
"bring the file system down and all of its ranks", \
|
||||
"fs", "rw")
|
||||
COMMAND("fs rm " \
|
||||
"name=fs_name,type=CephString " \
|
||||
"name=yes_i_really_mean_it,type=CephBool,req=false", \
|
||||
|
Loading…
Reference in New Issue
Block a user