Merge pull request #47079 from neesingh-rh/fix_56483

mgr/stats: missing clients in perf stats command output.

Reviewed-by: Venky Shankar <vshankar@redhat.com>
Reviewed-by: Jos Collin <jcollin@redhat.com>
This commit is contained in:
Venky Shankar 2022-08-24 09:49:47 +05:30 committed by GitHub
commit 49e66ed0c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 91 additions and 10 deletions

View File

@ -511,3 +511,79 @@ class TestMDSMetrics(CephFSTestCase):
if not (client_metadata[i]['valid_metrics']):
raise RuntimeError("valid_metrics not found!")
def test_perf_stats_stale_metrics_with_multiple_filesystem(self):
self.mount_a.umount_wait()
self.mount_b.umount_wait()
self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
"enable_multiple", "true", "--yes-i-really-mean-it")
# creating filesystem
fs_b = self._setup_fs(fs_name="fs2")
# Mount a client on fs_b
self.mount_b.mount_wait(cephfs_name=fs_b.name)
self.mount_b.write_n_mb("test.bin", 1)
self.mount_b.path_to_ino("test.bin")
self.mount_b.create_files()
# creating another filesystem
fs_a = self._setup_fs(fs_name="fs1")
# Mount a client on fs_a
self.mount_a.mount_wait(cephfs_name=fs_a.name)
self.mount_a.write_n_mb("pad.bin", 1)
self.mount_a.write_n_mb("test.bin", 2)
self.mount_a.path_to_ino("test.bin")
self.mount_a.create_files()
# validate
valid, metrics = self._get_metrics(
self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30)
log.debug(f"metrics={metrics}")
self.assertTrue(valid)
# get mounted client's entries from the global_metrics.
client_a_name = f'client.{self.mount_a.get_global_id()}'
global_metrics = metrics['global_metrics']
client_a_metrics = global_metrics.get("fs1", {}).get(client_a_name, {})
# fail active mds of fs_a
fs_a_mds = fs_a.get_active_names()[0]
self.mds_cluster.mds_fail(fs_a_mds)
fs_a.wait_for_state('up:active', rank=0, timeout=30)
# spread directory per rank
self._spread_directory_on_all_ranks(fs_a.id)
# spread some I/O
self._do_spread_io_all_clients(fs_a.id)
# wait a bit for mgr to get updated metrics
time.sleep(5)
# validate
try:
valid, metrics_new = self._get_metrics(
self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30)
log.debug(f'metrics={metrics_new}')
self.assertTrue(valid)
client_metadata = metrics_new['client_metadata']
client_a_metadata = client_metadata.get("fs1", {}).get(client_a_name, {})
global_metrics = metrics_new['global_metrics']
client_a_metrics_new = global_metrics.get("fs1", {}).get(client_a_name, {})
# the metrics should be different for the test to succeed.
self.assertTrue(client_a_metadata and client_a_metrics_new
and (client_a_metrics_new != client_a_metrics),
"Invalid 'ceph fs perf stats' metrics after"
f" rank0 mds of {fs_a.name} failover")
except MaxWhileTries:
raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics")
finally:
# cleanup test directories
self._cleanup_test_dirs()

View File

@ -195,15 +195,17 @@ class FSPerfStats(object):
gid_state = FSPerfStats.get_rank0_mds_gid_state(self.module.get('fs_map'))
if not gid_state:
return
rank0_gid, state = gid_state
if (rank0_gid and rank0_gid != self.prev_rank0_gid and state == 'up:active'):
#the new rank0 MDS is up:active
ua_last_updated = time.monotonic()
if (self.rqtimer and self.rqtimer.is_alive()):
self.rqtimer.cancel()
self.rqtimer = Timer(REREGISTER_TIMER_INTERVAL,
self.re_register_queries, args=(rank0_gid, ua_last_updated,))
self.rqtimer.start()
for value in gid_state:
rank0_gid, state = value
if (rank0_gid and rank0_gid != self.prev_rank0_gid and state == 'up:active'):
#the new rank0 MDS is up:active
ua_last_updated = time.monotonic()
if (self.rqtimer and self.rqtimer.is_alive()):
self.rqtimer.cancel()
self.rqtimer = Timer(REREGISTER_TIMER_INTERVAL,
self.re_register_queries,
args=(rank0_gid, ua_last_updated,))
self.rqtimer.start()
def re_register_queries(self, rank0_gid, ua_last_updated):
#reregister queries if the metrics are the latest. Otherwise reschedule the timer and
@ -221,12 +223,15 @@ class FSPerfStats(object):
@staticmethod
def get_rank0_mds_gid_state(fsmap):
gid_state = []
for fs in fsmap['filesystems']:
mds_map = fs['mdsmap']
if mds_map is not None:
for mds_id, mds_status in mds_map['info'].items():
if mds_status['rank'] == 0:
return mds_status['gid'], mds_status['state']
gid_state.append([mds_status['gid'], mds_status['state']])
if gid_state:
return gid_state
logger.warn("No rank0 mds in the fsmap")
def update_client_meta(self):