mirror of
https://github.com/ceph/ceph
synced 2024-12-20 18:33:44 +00:00
7b587304a5
test systemd units restart after reboot Signed-off-by: Vasu Kulkarni <vasu@redhat.com>
143 lines
6.4 KiB
Python
143 lines
6.4 KiB
Python
"""
|
|
Systemd test
|
|
"""
|
|
import contextlib
|
|
import logging
|
|
import re
|
|
import time
|
|
|
|
from cStringIO import StringIO
|
|
from teuthology.orchestra import run
|
|
from teuthology.misc import reconnect, get_first_mon, wait_until_healthy
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def task(ctx, config):
|
|
"""
|
|
- tasks:
|
|
ceph-deploy:
|
|
systemd:
|
|
|
|
Test ceph systemd services can start, stop and restart and
|
|
check for any failed services and report back errors
|
|
"""
|
|
for remote, roles in ctx.cluster.remotes.iteritems():
|
|
remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
|
|
'grep', 'ceph'])
|
|
r = remote.run(args=['sudo', 'systemctl', 'list-units', run.Raw('|'),
|
|
'grep', 'ceph'], stdout=StringIO(),
|
|
check_status=False)
|
|
log.info(r.stdout.getvalue())
|
|
if r.stdout.getvalue().find('failed'):
|
|
log.info("Ceph services in failed state")
|
|
|
|
# test overall service stop and start using ceph.target
|
|
# ceph.target tests are meant for ceph systemd tests
|
|
# and not actual process testing using 'ps'
|
|
log.info("Stopping all Ceph services")
|
|
remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
|
|
r = remote.run(args=['sudo', 'systemctl', 'status', 'ceph.target'],
|
|
stdout=StringIO(), check_status=False)
|
|
log.info(r.stdout.getvalue())
|
|
log.info("Checking process status")
|
|
r = remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
|
|
'grep', 'ceph'], stdout=StringIO())
|
|
if r.stdout.getvalue().find('Active: inactive'):
|
|
log.info("Sucessfully stopped all ceph services")
|
|
else:
|
|
log.info("Failed to stop ceph services")
|
|
|
|
log.info("Starting all Ceph services")
|
|
remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
|
|
r = remote.run(args=['sudo', 'systemctl', 'status', 'ceph.target'],
|
|
stdout=StringIO())
|
|
log.info(r.stdout.getvalue())
|
|
if r.stdout.getvalue().find('Active: active'):
|
|
log.info("Sucessfully started all Ceph services")
|
|
else:
|
|
log.info("info", "Failed to start Ceph services")
|
|
r = remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
|
|
'grep', 'ceph'], stdout=StringIO())
|
|
log.info(r.stdout.getvalue())
|
|
time.sleep(4)
|
|
|
|
# test individual services start stop
|
|
name = remote.shortname
|
|
mon_name = 'ceph-mon@' + name + '.service'
|
|
mds_name = 'ceph-mds@' + name + '.service'
|
|
mgr_name = 'ceph-mgr@' + name + '.service'
|
|
mon_role_name = 'mon.' + name
|
|
mds_role_name = 'mds.' + name
|
|
mgr_role_name = 'mgr.' + name
|
|
m_osd = re.search('--id (\d+) --setuser ceph', r.stdout.getvalue())
|
|
if m_osd:
|
|
osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
|
|
remote.run(args=['sudo', 'systemctl', 'status',
|
|
osd_service])
|
|
remote.run(args=['sudo', 'systemctl', 'stop',
|
|
osd_service])
|
|
time.sleep(4) # immediate check will result in deactivating state
|
|
r = remote.run(args=['sudo', 'systemctl', 'status', osd_service],
|
|
stdout=StringIO(), check_status=False)
|
|
log.info(r.stdout.getvalue())
|
|
if r.stdout.getvalue().find('Active: inactive'):
|
|
log.info("Sucessfully stopped single osd ceph service")
|
|
else:
|
|
log.info("Failed to stop ceph osd services")
|
|
remote.run(args=['sudo', 'systemctl', 'start',
|
|
osd_service])
|
|
time.sleep(4)
|
|
if mon_role_name in roles:
|
|
remote.run(args=['sudo', 'systemctl', 'status', mon_name])
|
|
remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
|
|
time.sleep(4) # immediate check will result in deactivating state
|
|
r = remote.run(args=['sudo', 'systemctl', 'status', mon_name],
|
|
stdout=StringIO(), check_status=False)
|
|
if r.stdout.getvalue().find('Active: inactive'):
|
|
log.info("Sucessfully stopped single mon ceph service")
|
|
else:
|
|
log.info("Failed to stop ceph mon service")
|
|
remote.run(args=['sudo', 'systemctl', 'start', mon_name])
|
|
time.sleep(4)
|
|
if mgr_role_name in roles:
|
|
remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
|
|
remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
|
|
time.sleep(4) # immediate check will result in deactivating state
|
|
r = remote.run(args=['sudo', 'systemctl', 'status', mgr_name],
|
|
stdout=StringIO(), check_status=False)
|
|
if r.stdout.getvalue().find('Active: inactive'):
|
|
log.info("Sucessfully stopped single ceph mgr service")
|
|
else:
|
|
log.info("Failed to stop ceph mgr service")
|
|
remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
|
|
time.sleep(4)
|
|
if mds_role_name in roles:
|
|
remote.run(args=['sudo', 'systemctl', 'status', mds_name])
|
|
remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
|
|
time.sleep(4) # immediate check will result in deactivating state
|
|
r = remote.run(args=['sudo', 'systemctl', 'status', mds_name],
|
|
stdout=StringIO(), check_status=False)
|
|
if r.stdout.getvalue().find('Active: inactive'):
|
|
log.info("Sucessfully stopped single ceph mds service")
|
|
else:
|
|
log.info("Failed to stop ceph mds service")
|
|
remote.run(args=['sudo', 'systemctl', 'start', mds_name])
|
|
time.sleep(4)
|
|
|
|
# reboot all nodes and verify the systemd units restart
|
|
# workunit that runs would fail if any of the systemd unit doesnt start
|
|
ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
|
|
# avoid immediate reconnect
|
|
time.sleep(120)
|
|
reconnect(ctx, 480) # reconnect all nodes
|
|
# for debug info
|
|
ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
|
|
'grep', 'ceph'])
|
|
# wait for HEALTH_OK
|
|
mon = get_first_mon(ctx, config)
|
|
(mon_remote,) = ctx.cluster.only(mon).remotes.iterkeys()
|
|
wait_until_healthy(ctx, mon_remote)
|
|
yield
|