mirror of
https://github.com/ceph/ceph
synced 2025-02-23 19:17:37 +00:00
Merge PR #38693 into master
* refs/pull/38693/head: qa: execute scrubs only on rank 0 client: print debug information about resolved MDS qa: let Client::resolve_mds lookup the rank Reviewed-by: Xiubo Li <xiubli@redhat.com>
This commit is contained in:
commit
ea61b6a10f
@ -1080,8 +1080,7 @@ class Filesystem(MDSCluster):
|
||||
return self.json_asok(command, 'mds', info['name'], timeout=timeout)
|
||||
|
||||
def rank_tell(self, command, rank=0, status=None):
|
||||
info = self.get_rank(rank=rank, status=status)
|
||||
return json.loads(self.mon_manager.raw_cluster_cmd("tell", 'mds.{0}'.format(info['name']), *command))
|
||||
return json.loads(self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command))
|
||||
|
||||
def ranks_tell(self, command, status=None):
|
||||
if status is None:
|
||||
|
@ -3,9 +3,8 @@ Thrash mds by simulating failures
|
||||
"""
|
||||
import logging
|
||||
import contextlib
|
||||
import threading
|
||||
|
||||
from gevent import sleep, killall, joinall, GreenletExit
|
||||
from gevent import sleep, GreenletExit
|
||||
from gevent.greenlet import Greenlet
|
||||
from gevent.event import Event
|
||||
from teuthology import misc as teuthology
|
||||
@ -18,14 +17,23 @@ from tasks.thrasher import Thrasher
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class ForwardScrubber(Thrasher, Greenlet):
|
||||
"""
|
||||
ForwardScrubber::
|
||||
|
||||
The ForwardScrubber does forward scrubbing of file-systems during execution
|
||||
of other tasks (workunits, etc).
|
||||
"""
|
||||
|
||||
def __init__(self, fs, scrub_timeout=300, sleep_between_iterations=1):
|
||||
super(ForwardScrubber, self).__init__()
|
||||
|
||||
class MDSRankScrubber(Thrasher, Greenlet):
|
||||
def __init__(self, fs, mds_rank, scrub_timeout=300):
|
||||
super(MDSRankScrubber, self).__init__()
|
||||
self.logger = log.getChild('fs.[{f}]'.format(f=fs.name))
|
||||
self.fs = fs
|
||||
self.mds_rank = mds_rank
|
||||
self.name = 'thrasher.fs.[{f}]'.format(f=fs.name)
|
||||
self.stopping = Event()
|
||||
self.scrub_timeout = scrub_timeout
|
||||
self.sleep_between_iterations = sleep_between_iterations
|
||||
|
||||
def _run(self):
|
||||
try:
|
||||
@ -35,10 +43,28 @@ class MDSRankScrubber(Thrasher, Greenlet):
|
||||
self.logger.exception("exception:")
|
||||
# allow successful completion so gevent doesn't see an exception...
|
||||
|
||||
def do_scrub(self, path="/", recursive=True):
|
||||
def stop(self):
|
||||
self.stopping.set()
|
||||
|
||||
def do_scrub(self):
|
||||
"""
|
||||
Perform the file-system scrubbing
|
||||
"""
|
||||
self.logger.info(f'start scrubbing fs: {self.fs.name}')
|
||||
|
||||
try:
|
||||
while not self.stopping.is_set():
|
||||
self._scrub()
|
||||
sleep(self.sleep_between_iterations)
|
||||
except GreenletExit:
|
||||
pass
|
||||
|
||||
self.logger.info(f'end scrubbing fs: {self.fs.name}')
|
||||
|
||||
def _scrub(self, path="/", recursive=True):
|
||||
self.logger.info(f"scrubbing fs: {self.fs.name}")
|
||||
recopt = ["recursive", "force"] if recursive else ["force"]
|
||||
out_json = self.fs.rank_tell(["scrub", "start", path] + recopt,
|
||||
rank=self.mds_rank)
|
||||
out_json = self.fs.rank_tell(["scrub", "start", path] + recopt)
|
||||
assert out_json is not None
|
||||
|
||||
tag = out_json['scrub_tag']
|
||||
@ -47,15 +73,14 @@ class MDSRankScrubber(Thrasher, Greenlet):
|
||||
assert out_json['return_code'] == 0
|
||||
assert out_json['mode'] == 'asynchronous'
|
||||
|
||||
self.wait_until_scrub_complete(tag)
|
||||
return self._wait_until_scrub_complete(tag)
|
||||
|
||||
def wait_until_scrub_complete(self, tag):
|
||||
def _wait_until_scrub_complete(self, tag):
|
||||
# time out after scrub_timeout seconds and assume as done
|
||||
with contextutil.safe_while(sleep=30, tries=self.scrub_timeout//30) as proceed:
|
||||
while proceed():
|
||||
try:
|
||||
out_json = self.fs.rank_tell(["scrub", "status"],
|
||||
rank=self.mds_rank)
|
||||
out_json = self.fs.rank_tell(["scrub", "status"])
|
||||
assert out_json is not None
|
||||
if out_json['status'] == "no active scrubs running":
|
||||
self.logger.info("all active scrubs completed")
|
||||
@ -72,96 +97,14 @@ class MDSRankScrubber(Thrasher, Greenlet):
|
||||
self.logger.info("retrying scrub status command in a while")
|
||||
pass
|
||||
|
||||
self.logger.info("timed out waiting for scrub to complete")
|
||||
|
||||
|
||||
class ForwardScrubber(Thrasher, Greenlet):
|
||||
"""
|
||||
ForwardScrubber::
|
||||
|
||||
The ForwardScrubber does forward scrubbing of file-systems during execution
|
||||
of other tasks (workunits, etc).
|
||||
|
||||
"""
|
||||
def __init__(self, fs, scrub_timeout=300, sleep_between_iterations=1):
|
||||
super(ForwardScrubber, self).__init__()
|
||||
|
||||
self.logger = log.getChild('fs.[{f}]'.format(f=fs.name))
|
||||
self.fs = fs
|
||||
self.name = 'thrasher.fs.[{f}]'.format(f=fs.name)
|
||||
self.stopping = Event()
|
||||
self.lock = threading.Lock()
|
||||
self.scrubbers = []
|
||||
self.scrub_timeout = scrub_timeout
|
||||
self.sleep_between_iterations = sleep_between_iterations
|
||||
|
||||
def _run(self):
|
||||
try:
|
||||
self.do_scrub()
|
||||
except Exception as e:
|
||||
self.set_thrasher_exception(e)
|
||||
self.logger.exception("exception:")
|
||||
# allow successful completion so gevent doesn't see an exception...
|
||||
|
||||
def log(self, x):
|
||||
"""Write data to the logger assigned to ForwardScrubber"""
|
||||
self.logger.info(x)
|
||||
|
||||
def stop(self):
|
||||
self.stopping.set()
|
||||
self.lock.acquire()
|
||||
try:
|
||||
self.log("killing all scrubbers")
|
||||
killall(self.scrubbers)
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
def do_scrub(self):
|
||||
"""
|
||||
Perform the file-system scrubbing
|
||||
"""
|
||||
self.log(f'starting do_scrub for fs: {self.fs.name}')
|
||||
|
||||
try:
|
||||
while not self.stopping.is_set():
|
||||
ranks = self.fs.get_all_mds_rank()
|
||||
|
||||
for r in ranks:
|
||||
scrubber = MDSRankScrubber(self.fs, r, self.scrub_timeout)
|
||||
self.lock.acquire()
|
||||
try:
|
||||
self.scrubbers.append(scrubber)
|
||||
finally:
|
||||
self.lock.release()
|
||||
scrubber.start()
|
||||
|
||||
# wait for all scrubbers to complete
|
||||
self.log("joining all scrubbers")
|
||||
joinall(self.scrubbers)
|
||||
|
||||
for s in self.scrubbers:
|
||||
if s.exception is not None:
|
||||
raise RuntimeError('error during scrub thrashing')
|
||||
|
||||
self.lock.acquire()
|
||||
try:
|
||||
self.scrubbers.clear()
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
sleep(self.sleep_between_iterations)
|
||||
except GreenletExit:
|
||||
pass
|
||||
|
||||
|
||||
def stop_all_fwd_scrubbers(thrashers):
|
||||
for thrasher in thrashers:
|
||||
if not isinstance(thrasher, ForwardScrubber):
|
||||
continue
|
||||
thrasher.stop()
|
||||
thrasher.join()
|
||||
if thrasher.exception is not None:
|
||||
raise RuntimeError(f"error during scrub thrashing: {thrasher.exception}")
|
||||
thrasher.join()
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
|
@ -5794,14 +5794,14 @@ int Client::resolve_mds(
|
||||
ceph_assert(targets != nullptr);
|
||||
|
||||
mds_role_t role;
|
||||
std::stringstream ss;
|
||||
int role_r = fsmap->parse_role(mds_spec, &role, ss);
|
||||
CachedStackStringStream css;
|
||||
int role_r = fsmap->parse_role(mds_spec, &role, *css);
|
||||
if (role_r == 0) {
|
||||
// We got a role, resolve it to a GID
|
||||
ldout(cct, 10) << __func__ << ": resolved '" << mds_spec << "' to role '"
|
||||
<< role << "'" << dendl;
|
||||
targets->push_back(
|
||||
fsmap->get_filesystem(role.fscid)->mds_map.get_info(role.rank).global_id);
|
||||
auto& info = fsmap->get_filesystem(role.fscid)->mds_map.get_info(role.rank);
|
||||
ldout(cct, 10) << __func__ << ": resolved " << mds_spec << " to role '"
|
||||
<< role << "' aka " << info.human_name() << dendl;
|
||||
targets->push_back(info.global_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -5811,42 +5811,48 @@ int Client::resolve_mds(
|
||||
// It is a possible GID
|
||||
const mds_gid_t mds_gid = mds_gid_t(rank_or_gid);
|
||||
if (fsmap->gid_exists(mds_gid)) {
|
||||
ldout(cct, 10) << __func__ << ": validated GID " << mds_gid << dendl;
|
||||
auto& info = fsmap->get_info_gid(mds_gid);
|
||||
ldout(cct, 10) << __func__ << ": validated gid " << mds_gid << " aka "
|
||||
<< info.human_name() << dendl;
|
||||
targets->push_back(mds_gid);
|
||||
return 0;
|
||||
} else {
|
||||
lderr(cct) << __func__ << ": GID " << mds_gid << " not in MDS map"
|
||||
lderr(cct) << __func__ << ": gid " << mds_gid << " not in MDS map"
|
||||
<< dendl;
|
||||
lderr(cct) << "FSMap: " << *fsmap << dendl;
|
||||
return -ENOENT;
|
||||
}
|
||||
} else if (mds_spec == "*") {
|
||||
// It is a wildcard: use all MDSs
|
||||
const auto mds_info = fsmap->get_mds_info();
|
||||
const auto& mds_info = fsmap->get_mds_info();
|
||||
|
||||
ldout(cct, 10) << __func__ << ": resolving `*' to all MDS daemons" << dendl;
|
||||
if (mds_info.empty()) {
|
||||
lderr(cct) << __func__ << ": * passed but no MDS daemons found" << dendl;
|
||||
lderr(cct) << __func__ << ": no MDS daemons found" << dendl;
|
||||
lderr(cct) << "FSMap: " << *fsmap << dendl;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
for (const auto& i : mds_info) {
|
||||
targets->push_back(i.first);
|
||||
for (const auto& [gid, info] : mds_info) {
|
||||
ldout(cct, 10) << __func__ << ": appending " << info.human_name() << " to targets" << dendl;
|
||||
targets->push_back(gid);
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
// It did not parse as an integer, it is not a wildcard, it must be a name
|
||||
const mds_gid_t mds_gid = fsmap->find_mds_gid_by_name(mds_spec);
|
||||
if (mds_gid == 0) {
|
||||
lderr(cct) << "MDS ID '" << mds_spec << "' not found" << dendl;
|
||||
|
||||
lderr(cct) << __func__ << ": no MDS daemons found by name `" << mds_spec << "'" << dendl;
|
||||
lderr(cct) << "FSMap: " << *fsmap << dendl;
|
||||
|
||||
return -ENOENT;
|
||||
} else {
|
||||
ldout(cct, 10) << __func__ << ": resolved ID '" << mds_spec
|
||||
<< "' to GID " << mds_gid << dendl;
|
||||
auto& info = fsmap->get_info_gid(mds_gid);
|
||||
ldout(cct, 10) << __func__ << ": resolved name '" << mds_spec
|
||||
<< "' to " << info.human_name() << dendl;
|
||||
targets->push_back(mds_gid);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user