2018-07-12 13:25:53 +00:00
|
|
|
"""
|
|
|
|
CephFS sub-tasks.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import logging
|
|
|
|
import re
|
|
|
|
|
2021-03-09 21:54:34 +00:00
|
|
|
from tasks.cephfs.filesystem import Filesystem, MDSCluster
|
2018-07-12 13:25:53 +00:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2021-10-01 16:05:42 +00:00
|
|
|
# Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
|
|
|
|
CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5)
|
2022-06-16 13:03:31 +00:00
|
|
|
CEPH_MDSMAP_NOT_JOINABLE = (1 << 0)
|
2021-10-01 16:05:42 +00:00
|
|
|
CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
|
|
|
|
UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1)
|
|
|
|
def pre_upgrade_save(ctx, config):
|
|
|
|
"""
|
|
|
|
That the upgrade procedure doesn't clobber state: save state.
|
|
|
|
"""
|
|
|
|
|
|
|
|
mdsc = MDSCluster(ctx)
|
|
|
|
status = mdsc.status()
|
|
|
|
|
|
|
|
state = {}
|
|
|
|
ctx['mds-upgrade-state'] = state
|
|
|
|
|
|
|
|
for fs in list(status.get_filesystems()):
|
|
|
|
fscid = fs['id']
|
|
|
|
mdsmap = fs['mdsmap']
|
|
|
|
fs_state = {}
|
|
|
|
fs_state['epoch'] = mdsmap['epoch']
|
|
|
|
fs_state['max_mds'] = mdsmap['max_mds']
|
|
|
|
fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK
|
|
|
|
state[fscid] = fs_state
|
|
|
|
log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}")
|
|
|
|
|
|
|
|
|
|
|
|
def post_upgrade_checks(ctx, config):
|
|
|
|
"""
|
|
|
|
That the upgrade procedure doesn't clobber state.
|
|
|
|
"""
|
|
|
|
|
|
|
|
state = ctx['mds-upgrade-state']
|
|
|
|
|
|
|
|
mdsc = MDSCluster(ctx)
|
|
|
|
status = mdsc.status()
|
|
|
|
|
|
|
|
for fs in list(status.get_filesystems()):
|
|
|
|
fscid = fs['id']
|
|
|
|
mdsmap = fs['mdsmap']
|
|
|
|
fs_state = state[fscid]
|
|
|
|
log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}")
|
|
|
|
|
|
|
|
# check state was restored to previous values
|
|
|
|
assert fs_state['max_mds'] == mdsmap['max_mds']
|
|
|
|
assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK)
|
|
|
|
|
|
|
|
# now confirm that the upgrade procedure was followed
|
|
|
|
epoch = mdsmap['epoch']
|
|
|
|
pre_upgrade_epoch = fs_state['epoch']
|
|
|
|
assert pre_upgrade_epoch < epoch
|
2022-06-16 13:03:31 +00:00
|
|
|
multiple_max_mds = fs_state['max_mds'] > 1
|
2021-10-01 16:05:42 +00:00
|
|
|
did_decrease_max_mds = False
|
|
|
|
should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
|
|
|
|
did_disable_allow_standby_replay = False
|
2022-06-16 13:03:31 +00:00
|
|
|
did_fail_fs = False
|
2021-10-01 16:05:42 +00:00
|
|
|
for i in range(pre_upgrade_epoch+1, mdsmap['epoch']):
|
|
|
|
old_status = mdsc.status(epoch=i)
|
|
|
|
old_fs = old_status.get_fsmap(fscid)
|
|
|
|
old_mdsmap = old_fs['mdsmap']
|
2022-06-16 13:03:31 +00:00
|
|
|
if not multiple_max_mds \
|
|
|
|
and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE):
|
|
|
|
raise RuntimeError('mgr is failing fs when there is only one '
|
|
|
|
f'rank in epoch {i}.')
|
|
|
|
if multiple_max_mds \
|
|
|
|
and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \
|
|
|
|
and old_mdsmap['max_mds'] == 1:
|
|
|
|
raise RuntimeError('mgr is failing fs as well the max_mds '
|
|
|
|
f'is reduced in epoch {i}')
|
|
|
|
if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE:
|
|
|
|
log.debug(f"max_mds not reduced in epoch {i} as fs was failed "
|
|
|
|
"for carrying out rapid multi-rank mds upgrade")
|
|
|
|
did_fail_fs = True
|
|
|
|
if multiple_max_mds and old_mdsmap['max_mds'] == 1:
|
2021-10-01 16:05:42 +00:00
|
|
|
log.debug(f"max_mds reduced in epoch {i}")
|
|
|
|
did_decrease_max_mds = True
|
|
|
|
if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY):
|
|
|
|
log.debug(f"allow_standby_replay disabled in epoch {i}")
|
|
|
|
did_disable_allow_standby_replay = True
|
2022-06-16 13:03:31 +00:00
|
|
|
assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds
|
2021-10-01 16:05:42 +00:00
|
|
|
assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay
|
|
|
|
|
|
|
|
|
2021-03-09 21:54:34 +00:00
|
|
|
def ready(ctx, config):
|
|
|
|
"""
|
|
|
|
That the file system is ready for clients.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if config is None:
|
|
|
|
config = {}
|
|
|
|
assert isinstance(config, dict), \
|
|
|
|
'task only accepts a dict for configuration'
|
|
|
|
|
|
|
|
timeout = config.get('timeout', 300)
|
|
|
|
|
|
|
|
mdsc = MDSCluster(ctx)
|
|
|
|
status = mdsc.status()
|
|
|
|
|
|
|
|
for filesystem in status.get_filesystems():
|
|
|
|
fs = Filesystem(ctx, fscid=filesystem['id'])
|
|
|
|
fs.wait_for_daemons(timeout=timeout, status=status)
|
|
|
|
|
2018-07-12 13:25:53 +00:00
|
|
|
def clients_evicted(ctx, config):
|
|
|
|
"""
|
|
|
|
Check clients are evicted, unmount (cleanup) if so.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if config is None:
|
|
|
|
config = {}
|
|
|
|
assert isinstance(config, dict), \
|
|
|
|
'task only accepts a dict for configuration'
|
|
|
|
|
|
|
|
clients = config.get('clients')
|
|
|
|
|
|
|
|
if clients is None:
|
|
|
|
clients = {("client."+client_id): True for client_id in ctx.mounts}
|
|
|
|
|
|
|
|
log.info("clients is {}".format(str(clients)))
|
|
|
|
|
|
|
|
fs = Filesystem(ctx)
|
|
|
|
status = fs.status()
|
|
|
|
|
|
|
|
has_session = set()
|
|
|
|
mounts = {}
|
|
|
|
for client in clients:
|
|
|
|
client_id = re.match("^client.([0-9]+)$", client).groups(1)[0]
|
|
|
|
mounts[client] = ctx.mounts.get(client_id)
|
|
|
|
|
|
|
|
for rank in fs.get_ranks(status=status):
|
|
|
|
ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status)
|
|
|
|
for session in ls:
|
2020-07-19 09:36:29 +00:00
|
|
|
for client, evicted in clients.items():
|
2018-07-12 13:25:53 +00:00
|
|
|
mount = mounts.get(client)
|
|
|
|
if mount is not None:
|
|
|
|
global_id = mount.get_global_id()
|
|
|
|
if session['id'] == global_id:
|
|
|
|
if evicted:
|
|
|
|
raise RuntimeError("client still has session: {}".format(str(session)))
|
|
|
|
else:
|
|
|
|
log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank']))
|
|
|
|
has_session.add(client)
|
|
|
|
|
|
|
|
no_session = set(clients) - has_session
|
|
|
|
should_assert = False
|
2020-07-19 09:36:29 +00:00
|
|
|
for client, evicted in clients.items():
|
2018-07-12 13:25:53 +00:00
|
|
|
mount = mounts.get(client)
|
|
|
|
if mount is not None:
|
|
|
|
if evicted:
|
2020-06-15 15:27:03 +00:00
|
|
|
log.info("confirming client {} is blocklisted".format(client))
|
2021-03-03 11:44:22 +00:00
|
|
|
assert fs.is_addr_blocklisted(mount.get_global_addr())
|
2018-07-12 13:25:53 +00:00
|
|
|
elif client in no_session:
|
|
|
|
log.info("client {} should not be evicted but has no session with an MDS".format(client))
|
2021-03-03 11:44:22 +00:00
|
|
|
fs.is_addr_blocklisted(mount.get_global_addr()) # for debugging
|
2018-07-12 13:25:53 +00:00
|
|
|
should_assert = True
|
|
|
|
if should_assert:
|
|
|
|
raise RuntimeError("some clients which should not be evicted have no session with an MDS?")
|