mirror of
https://github.com/ceph/ceph
synced 2025-02-19 08:57:27 +00:00
Merge pull request #47079 from neesingh-rh/fix_56483
mgr/stats: missing clients in perf stats command output. Reviewed-by: Venky Shankar <vshankar@redhat.com> Reviewed-by: Jos Collin <jcollin@redhat.com>
This commit is contained in:
commit
49e66ed0c6
@ -511,3 +511,79 @@ class TestMDSMetrics(CephFSTestCase):
|
||||
if not (client_metadata[i]['valid_metrics']):
|
||||
raise RuntimeError("valid_metrics not found!")
|
||||
|
||||
def test_perf_stats_stale_metrics_with_multiple_filesystem(self):
|
||||
self.mount_a.umount_wait()
|
||||
self.mount_b.umount_wait()
|
||||
|
||||
self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
|
||||
"enable_multiple", "true", "--yes-i-really-mean-it")
|
||||
|
||||
# creating filesystem
|
||||
fs_b = self._setup_fs(fs_name="fs2")
|
||||
|
||||
# Mount a client on fs_b
|
||||
self.mount_b.mount_wait(cephfs_name=fs_b.name)
|
||||
self.mount_b.write_n_mb("test.bin", 1)
|
||||
self.mount_b.path_to_ino("test.bin")
|
||||
self.mount_b.create_files()
|
||||
|
||||
# creating another filesystem
|
||||
fs_a = self._setup_fs(fs_name="fs1")
|
||||
|
||||
# Mount a client on fs_a
|
||||
self.mount_a.mount_wait(cephfs_name=fs_a.name)
|
||||
self.mount_a.write_n_mb("pad.bin", 1)
|
||||
self.mount_a.write_n_mb("test.bin", 2)
|
||||
self.mount_a.path_to_ino("test.bin")
|
||||
self.mount_a.create_files()
|
||||
|
||||
# validate
|
||||
valid, metrics = self._get_metrics(
|
||||
self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30)
|
||||
log.debug(f"metrics={metrics}")
|
||||
self.assertTrue(valid)
|
||||
|
||||
# get mounted client's entries from the global_metrics.
|
||||
client_a_name = f'client.{self.mount_a.get_global_id()}'
|
||||
|
||||
global_metrics = metrics['global_metrics']
|
||||
client_a_metrics = global_metrics.get("fs1", {}).get(client_a_name, {})
|
||||
|
||||
# fail active mds of fs_a
|
||||
fs_a_mds = fs_a.get_active_names()[0]
|
||||
self.mds_cluster.mds_fail(fs_a_mds)
|
||||
fs_a.wait_for_state('up:active', rank=0, timeout=30)
|
||||
|
||||
# spread directory per rank
|
||||
self._spread_directory_on_all_ranks(fs_a.id)
|
||||
|
||||
# spread some I/O
|
||||
self._do_spread_io_all_clients(fs_a.id)
|
||||
|
||||
# wait a bit for mgr to get updated metrics
|
||||
time.sleep(5)
|
||||
|
||||
# validate
|
||||
try:
|
||||
valid, metrics_new = self._get_metrics(
|
||||
self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30)
|
||||
log.debug(f'metrics={metrics_new}')
|
||||
self.assertTrue(valid)
|
||||
|
||||
client_metadata = metrics_new['client_metadata']
|
||||
client_a_metadata = client_metadata.get("fs1", {}).get(client_a_name, {})
|
||||
|
||||
global_metrics = metrics_new['global_metrics']
|
||||
client_a_metrics_new = global_metrics.get("fs1", {}).get(client_a_name, {})
|
||||
|
||||
# the metrics should be different for the test to succeed.
|
||||
self.assertTrue(client_a_metadata and client_a_metrics_new
|
||||
and (client_a_metrics_new != client_a_metrics),
|
||||
"Invalid 'ceph fs perf stats' metrics after"
|
||||
f" rank0 mds of {fs_a.name} failover")
|
||||
except MaxWhileTries:
|
||||
raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics")
|
||||
finally:
|
||||
# cleanup test directories
|
||||
self._cleanup_test_dirs()
|
||||
|
||||
|
@ -195,15 +195,17 @@ class FSPerfStats(object):
|
||||
gid_state = FSPerfStats.get_rank0_mds_gid_state(self.module.get('fs_map'))
|
||||
if not gid_state:
|
||||
return
|
||||
rank0_gid, state = gid_state
|
||||
if (rank0_gid and rank0_gid != self.prev_rank0_gid and state == 'up:active'):
|
||||
#the new rank0 MDS is up:active
|
||||
ua_last_updated = time.monotonic()
|
||||
if (self.rqtimer and self.rqtimer.is_alive()):
|
||||
self.rqtimer.cancel()
|
||||
self.rqtimer = Timer(REREGISTER_TIMER_INTERVAL,
|
||||
self.re_register_queries, args=(rank0_gid, ua_last_updated,))
|
||||
self.rqtimer.start()
|
||||
for value in gid_state:
|
||||
rank0_gid, state = value
|
||||
if (rank0_gid and rank0_gid != self.prev_rank0_gid and state == 'up:active'):
|
||||
#the new rank0 MDS is up:active
|
||||
ua_last_updated = time.monotonic()
|
||||
if (self.rqtimer and self.rqtimer.is_alive()):
|
||||
self.rqtimer.cancel()
|
||||
self.rqtimer = Timer(REREGISTER_TIMER_INTERVAL,
|
||||
self.re_register_queries,
|
||||
args=(rank0_gid, ua_last_updated,))
|
||||
self.rqtimer.start()
|
||||
|
||||
def re_register_queries(self, rank0_gid, ua_last_updated):
|
||||
#reregister queries if the metrics are the latest. Otherwise reschedule the timer and
|
||||
@ -221,12 +223,15 @@ class FSPerfStats(object):
|
||||
|
||||
@staticmethod
|
||||
def get_rank0_mds_gid_state(fsmap):
|
||||
gid_state = []
|
||||
for fs in fsmap['filesystems']:
|
||||
mds_map = fs['mdsmap']
|
||||
if mds_map is not None:
|
||||
for mds_id, mds_status in mds_map['info'].items():
|
||||
if mds_status['rank'] == 0:
|
||||
return mds_status['gid'], mds_status['state']
|
||||
gid_state.append([mds_status['gid'], mds_status['state']])
|
||||
if gid_state:
|
||||
return gid_state
|
||||
logger.warn("No rank0 mds in the fsmap")
|
||||
|
||||
def update_client_meta(self):
|
||||
|
Loading…
Reference in New Issue
Block a user