2016-05-18 21:32:59 +00:00
|
|
|
"""
|
|
|
|
Systemd test
|
|
|
|
"""
|
|
|
|
import contextlib
|
|
|
|
import logging
|
|
|
|
import re
|
|
|
|
import time
|
|
|
|
|
|
|
|
from teuthology.orchestra import run
|
2017-03-29 16:27:20 +00:00
|
|
|
from teuthology.misc import reconnect, get_first_mon, wait_until_healthy
|
2016-05-18 21:32:59 +00:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2020-02-21 19:59:47 +00:00
|
|
|
def _remote_service_status(remote, service):
|
|
|
|
status = remote.sh('sudo systemctl status %s' % service,
|
|
|
|
check_status=False)
|
|
|
|
return status
|
2016-05-18 21:32:59 +00:00
|
|
|
|
|
|
|
@contextlib.contextmanager
|
|
|
|
def task(ctx, config):
|
|
|
|
"""
|
|
|
|
- tasks:
|
|
|
|
ceph-deploy:
|
|
|
|
systemd:
|
|
|
|
|
|
|
|
Test ceph systemd services can start, stop and restart and
|
|
|
|
check for any failed services and report back errors
|
|
|
|
"""
|
2019-10-09 12:36:58 +00:00
|
|
|
for remote, roles in ctx.cluster.remotes.items():
|
2016-05-18 21:32:59 +00:00
|
|
|
remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
|
|
|
|
'grep', 'ceph'])
|
2020-02-21 19:59:47 +00:00
|
|
|
units = remote.sh('sudo systemctl list-units | grep ceph',
|
|
|
|
check_status=False)
|
|
|
|
log.info(units)
|
|
|
|
if units.find('failed'):
|
2016-05-18 21:32:59 +00:00
|
|
|
log.info("Ceph services in failed state")
|
|
|
|
|
2016-06-29 18:30:02 +00:00
|
|
|
# test overall service stop and start using ceph.target
|
|
|
|
# ceph.target tests are meant for ceph systemd tests
|
|
|
|
# and not actual process testing using 'ps'
|
2016-05-18 21:32:59 +00:00
|
|
|
log.info("Stopping all Ceph services")
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
|
2020-02-21 19:59:47 +00:00
|
|
|
status = _remote_service_status(remote, 'ceph.target')
|
|
|
|
log.info(status)
|
2016-06-29 18:30:02 +00:00
|
|
|
log.info("Checking process status")
|
2020-02-21 19:59:47 +00:00
|
|
|
ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
|
|
|
|
if ps_eaf.find('Active: inactive'):
|
2018-09-18 03:21:13 +00:00
|
|
|
log.info("Successfully stopped all ceph services")
|
2016-05-18 21:32:59 +00:00
|
|
|
else:
|
|
|
|
log.info("Failed to stop ceph services")
|
2016-06-29 18:30:02 +00:00
|
|
|
|
2016-05-18 21:32:59 +00:00
|
|
|
log.info("Starting all Ceph services")
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
|
2020-02-21 19:59:47 +00:00
|
|
|
status = _remote_service_status(remote, 'ceph.target')
|
|
|
|
log.info(status)
|
|
|
|
if status.find('Active: active'):
|
2018-09-18 03:21:13 +00:00
|
|
|
log.info("Successfully started all Ceph services")
|
2016-05-18 21:32:59 +00:00
|
|
|
else:
|
|
|
|
log.info("info", "Failed to start Ceph services")
|
2020-02-21 19:59:47 +00:00
|
|
|
ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
|
|
|
|
log.info(ps_eaf)
|
2016-05-18 21:32:59 +00:00
|
|
|
time.sleep(4)
|
2016-06-29 18:30:02 +00:00
|
|
|
|
2016-05-18 21:32:59 +00:00
|
|
|
# test individual services start stop
|
|
|
|
name = remote.shortname
|
|
|
|
mon_name = 'ceph-mon@' + name + '.service'
|
|
|
|
mds_name = 'ceph-mds@' + name + '.service'
|
2016-09-27 11:22:45 +00:00
|
|
|
mgr_name = 'ceph-mgr@' + name + '.service'
|
2016-05-18 21:32:59 +00:00
|
|
|
mon_role_name = 'mon.' + name
|
|
|
|
mds_role_name = 'mds.' + name
|
2016-09-27 11:22:45 +00:00
|
|
|
mgr_role_name = 'mgr.' + name
|
2020-06-19 11:26:10 +00:00
|
|
|
m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf)
|
2016-05-18 21:32:59 +00:00
|
|
|
if m_osd:
|
|
|
|
osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'status',
|
|
|
|
osd_service])
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'stop',
|
|
|
|
osd_service])
|
2016-06-29 18:30:02 +00:00
|
|
|
time.sleep(4) # immediate check will result in deactivating state
|
2020-02-21 19:59:47 +00:00
|
|
|
status = _remote_service_status(remote, osd_service)
|
|
|
|
log.info(status)
|
|
|
|
if status.find('Active: inactive'):
|
2018-09-18 03:21:13 +00:00
|
|
|
log.info("Successfully stopped single osd ceph service")
|
2016-05-18 21:32:59 +00:00
|
|
|
else:
|
|
|
|
log.info("Failed to stop ceph osd services")
|
2020-02-21 19:59:47 +00:00
|
|
|
remote.sh(['sudo', 'systemctl', 'start', osd_service])
|
2016-05-18 21:32:59 +00:00
|
|
|
time.sleep(4)
|
|
|
|
if mon_role_name in roles:
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'status', mon_name])
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
|
2016-06-29 18:30:02 +00:00
|
|
|
time.sleep(4) # immediate check will result in deactivating state
|
2020-02-21 19:59:47 +00:00
|
|
|
status = _remote_service_status(remote, mon_name)
|
|
|
|
if status.find('Active: inactive'):
|
2018-09-18 03:21:13 +00:00
|
|
|
log.info("Successfully stopped single mon ceph service")
|
2016-05-18 21:32:59 +00:00
|
|
|
else:
|
|
|
|
log.info("Failed to stop ceph mon service")
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'start', mon_name])
|
|
|
|
time.sleep(4)
|
2016-09-27 11:22:45 +00:00
|
|
|
if mgr_role_name in roles:
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
|
|
|
|
time.sleep(4) # immediate check will result in deactivating state
|
2020-02-21 19:59:47 +00:00
|
|
|
status = _remote_service_status(remote, mgr_name)
|
|
|
|
if status.find('Active: inactive'):
|
2018-09-18 03:21:13 +00:00
|
|
|
log.info("Successfully stopped single ceph mgr service")
|
2016-09-27 11:22:45 +00:00
|
|
|
else:
|
|
|
|
log.info("Failed to stop ceph mgr service")
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
|
|
|
|
time.sleep(4)
|
2016-05-18 21:32:59 +00:00
|
|
|
if mds_role_name in roles:
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'status', mds_name])
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
|
2016-06-29 18:30:02 +00:00
|
|
|
time.sleep(4) # immediate check will result in deactivating state
|
2020-02-21 19:59:47 +00:00
|
|
|
status = _remote_service_status(remote, mds_name)
|
|
|
|
if status.find('Active: inactive'):
|
2018-09-18 03:21:13 +00:00
|
|
|
log.info("Successfully stopped single ceph mds service")
|
2016-05-18 21:32:59 +00:00
|
|
|
else:
|
|
|
|
log.info("Failed to stop ceph mds service")
|
|
|
|
remote.run(args=['sudo', 'systemctl', 'start', mds_name])
|
|
|
|
time.sleep(4)
|
2017-03-29 16:27:20 +00:00
|
|
|
|
|
|
|
# reboot all nodes and verify the systemd units restart
|
|
|
|
# workunit that runs would fail if any of the systemd unit doesnt start
|
|
|
|
ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
|
|
|
|
# avoid immediate reconnect
|
|
|
|
time.sleep(120)
|
|
|
|
reconnect(ctx, 480) # reconnect all nodes
|
|
|
|
# for debug info
|
|
|
|
ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
|
|
|
|
'grep', 'ceph'])
|
|
|
|
# wait for HEALTH_OK
|
|
|
|
mon = get_first_mon(ctx, config)
|
2019-10-11 15:57:47 +00:00
|
|
|
(mon_remote,) = ctx.cluster.only(mon).remotes.keys()
|
2017-04-11 20:51:47 +00:00
|
|
|
wait_until_healthy(ctx, mon_remote, use_sudo=True)
|
2016-05-18 21:32:59 +00:00
|
|
|
yield
|