ceph/qa/tasks/systemd.py

"""
Systemd test
"""
import contextlib
import logging
import re
import time

from teuthology.orchestra import run
from teuthology.misc import reconnect, get_first_mon, wait_until_healthy

log = logging.getLogger(__name__)

def _remote_service_status(remote, service):
    status = remote.sh('sudo systemctl status %s' % service,
                       check_status=False)
    return status

@contextlib.contextmanager
def task(ctx, config):
    """
      - tasks:
          ceph-deploy:
          systemd:

    Test ceph systemd services can start, stop and restart and
    check for any failed services and report back errors
    """
    for remote, roles in ctx.cluster.remotes.items():
        remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
                         'grep', 'ceph'])
        units = remote.sh('sudo systemctl list-units | grep ceph',
                          check_status=False)
        log.info(units)
        if units.find('failed'):
            log.info("Ceph services in failed state")

        # test overall service stop and start using ceph.target
        # ceph.target tests are meant for ceph systemd tests
        # and not actual process testing using 'ps'
        log.info("Stopping all Ceph services")
        remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
        status = _remote_service_status(remote, 'ceph.target')
        log.info(status)
        log.info("Checking process status")
        ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
        if ps_eaf.find('Active: inactive'):
            log.info("Successfully stopped all ceph services")
        else:
            log.info("Failed to stop ceph services")

        log.info("Starting all Ceph services")
        remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
        status = _remote_service_status(remote, 'ceph.target')
        log.info(status)
        if status.find('Active: active'):
            log.info("Successfully started all Ceph services")
        else:
            log.info("info", "Failed to start Ceph services")
        ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
        log.info(ps_eaf)
        time.sleep(4)

        # test individual services start stop
        name = remote.shortname
        mon_name = 'ceph-mon@' + name + '.service'
        mds_name = 'ceph-mds@' + name + '.service'
        mgr_name = 'ceph-mgr@' + name + '.service'
        mon_role_name = 'mon.' + name
        mds_role_name = 'mds.' + name
        mgr_role_name = 'mgr.' + name
        m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf)
        if m_osd:
            osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
            remote.run(args=['sudo', 'systemctl', 'status',
                             osd_service])
            remote.run(args=['sudo', 'systemctl', 'stop',
                             osd_service])
            time.sleep(4)  # immediate check will result in deactivating state
            status = _remote_service_status(remote, osd_service)
            log.info(status)
            if status.find('Active: inactive'):
                log.info("Successfully stopped single osd ceph service")
            else:
                log.info("Failed to stop ceph osd services")
            remote.sh(['sudo', 'systemctl', 'start', osd_service])
            time.sleep(4)
        if mon_role_name in roles:
            remote.run(args=['sudo', 'systemctl', 'status', mon_name])
            remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
            time.sleep(4)  # immediate check will result in deactivating state
            status = _remote_service_status(remote, mon_name)
            if status.find('Active: inactive'):
                log.info("Successfully stopped single mon ceph service")
            else:
                log.info("Failed to stop ceph mon service")
            remote.run(args=['sudo', 'systemctl', 'start', mon_name])
            time.sleep(4)
        if mgr_role_name in roles:
            remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
            remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
            time.sleep(4)  # immediate check will result in deactivating state
            status = _remote_service_status(remote, mgr_name)
            if status.find('Active: inactive'):
                log.info("Successfully stopped single ceph mgr service")
            else:
                log.info("Failed to stop ceph mgr service")
            remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
            time.sleep(4)
        if mds_role_name in roles:
            remote.run(args=['sudo', 'systemctl', 'status', mds_name])
            remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
            time.sleep(4)  # immediate check will result in deactivating state
            status = _remote_service_status(remote, mds_name)
            if status.find('Active: inactive'):
                log.info("Successfully stopped single ceph mds service")
            else:
                log.info("Failed to stop ceph mds service")
            remote.run(args=['sudo', 'systemctl', 'start', mds_name])
            time.sleep(4)

    # reboot all nodes and verify the systemd units restart
    # workunit that runs would fail if any of the systemd unit doesnt start
    ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
    # avoid immediate reconnect
    time.sleep(120)
    reconnect(ctx, 480)  # reconnect all nodes
    # for debug info
    ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
                          'grep', 'ceph'])
    # wait for HEALTH_OK
    mon = get_first_mon(ctx, config)
    (mon_remote,) = ctx.cluster.only(mon).remotes.keys()
    wait_until_healthy(ctx, mon_remote, use_sudo=True)
    yield
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`"""`
			`Systemd test`
			`"""`
			`import contextlib`
			`import logging`
			`import re`
			`import time`

			`from teuthology.orchestra import run`
Add reboot case for systemd test test systemd units restart after reboot Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2017-03-29 16:27:20 +00:00			`from teuthology.misc import reconnect, get_first_mon, wait_until_healthy`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00
			`log = logging.getLogger(__name__)`

qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`def _remote_service_status(remote, service):`
			`status = remote.sh('sudo systemctl status %s' % service,`
			`check_status=False)`
			`return status`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00
			`@contextlib.contextmanager`
			`def task(ctx, config):`
			`"""`
			`- tasks:`
			`ceph-deploy:`
			`systemd:`

			`Test ceph systemd services can start, stop and restart and`
			`check for any failed services and report back errors`
			`"""`
qa: get rid of iteritems for python3 compatibility Fixes: https://tracker.ceph.com/issues/42267 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2019-10-09 12:36:58 +00:00			`for remote, roles in ctx.cluster.remotes.items():`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`remote.run(args=['sudo', 'ps', '-eaf', run.Raw('\|'),`
			`'grep', 'ceph'])`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`units = remote.sh('sudo systemctl list-units \| grep ceph',`
			`check_status=False)`
			`log.info(units)`
			`if units.find('failed'):`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`log.info("Ceph services in failed state")`

Few comment fixes Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-06-29 18:30:02 +00:00			`# test overall service stop and start using ceph.target`
			`# ceph.target tests are meant for ceph systemd tests`
			`# and not actual process testing using 'ps'`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`log.info("Stopping all Ceph services")`
			`remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`status = _remote_service_status(remote, 'ceph.target')`
			`log.info(status)`
Few comment fixes Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-06-29 18:30:02 +00:00			`log.info("Checking process status")`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`ps_eaf = remote.sh('sudo ps -eaf \| grep ceph')`
			`if ps_eaf.find('Active: inactive'):`
qa: fix typos Signed-off-by: Kefu Chai <kchai@redhat.com> 2018-09-18 03:21:13 +00:00			`log.info("Successfully stopped all ceph services")`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`else:`
			`log.info("Failed to stop ceph services")`
Few comment fixes Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-06-29 18:30:02 +00:00
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`log.info("Starting all Ceph services")`
			`remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`status = _remote_service_status(remote, 'ceph.target')`
			`log.info(status)`
			`if status.find('Active: active'):`
qa: fix typos Signed-off-by: Kefu Chai <kchai@redhat.com> 2018-09-18 03:21:13 +00:00			`log.info("Successfully started all Ceph services")`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`else:`
			`log.info("info", "Failed to start Ceph services")`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`ps_eaf = remote.sh('sudo ps -eaf \| grep ceph')`
			`log.info(ps_eaf)`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`time.sleep(4)`
Few comment fixes Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-06-29 18:30:02 +00:00
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`# test individual services start stop`
			`name = remote.shortname`
			`mon_name = 'ceph-mon@' + name + '.service'`
			`mds_name = 'ceph-mds@' + name + '.service'`
tasks: update to run ceph-mgr daemons Signed-off-by: John Spray <john.spray@redhat.com> 2016-09-27 11:22:45 +00:00			`mgr_name = 'ceph-mgr@' + name + '.service'`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`mon_role_name = 'mon.' + name`
			`mds_role_name = 'mds.' + name`
tasks: update to run ceph-mgr daemons Signed-off-by: John Spray <john.spray@redhat.com> 2016-09-27 11:22:45 +00:00			`mgr_role_name = 'mgr.' + name`
qa/tasks: flake8 fixes Signed-off-by: Kefu Chai <kchai@redhat.com> 2020-06-19 11:26:10 +00:00			`m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf)`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`if m_osd:`
			`osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))`
			`remote.run(args=['sudo', 'systemctl', 'status',`
			`osd_service])`
			`remote.run(args=['sudo', 'systemctl', 'stop',`
			`osd_service])`
Few comment fixes Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-06-29 18:30:02 +00:00			`time.sleep(4) # immediate check will result in deactivating state`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`status = _remote_service_status(remote, osd_service)`
			`log.info(status)`
			`if status.find('Active: inactive'):`
qa: fix typos Signed-off-by: Kefu Chai <kchai@redhat.com> 2018-09-18 03:21:13 +00:00			`log.info("Successfully stopped single osd ceph service")`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`else:`
			`log.info("Failed to stop ceph osd services")`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`remote.sh(['sudo', 'systemctl', 'start', osd_service])`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`time.sleep(4)`
			`if mon_role_name in roles:`
			`remote.run(args=['sudo', 'systemctl', 'status', mon_name])`
			`remote.run(args=['sudo', 'systemctl', 'stop', mon_name])`
Few comment fixes Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-06-29 18:30:02 +00:00			`time.sleep(4) # immediate check will result in deactivating state`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`status = _remote_service_status(remote, mon_name)`
			`if status.find('Active: inactive'):`
qa: fix typos Signed-off-by: Kefu Chai <kchai@redhat.com> 2018-09-18 03:21:13 +00:00			`log.info("Successfully stopped single mon ceph service")`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`else:`
			`log.info("Failed to stop ceph mon service")`
			`remote.run(args=['sudo', 'systemctl', 'start', mon_name])`
			`time.sleep(4)`
tasks: update to run ceph-mgr daemons Signed-off-by: John Spray <john.spray@redhat.com> 2016-09-27 11:22:45 +00:00			`if mgr_role_name in roles:`
			`remote.run(args=['sudo', 'systemctl', 'status', mgr_name])`
			`remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])`
			`time.sleep(4) # immediate check will result in deactivating state`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`status = _remote_service_status(remote, mgr_name)`
			`if status.find('Active: inactive'):`
qa: fix typos Signed-off-by: Kefu Chai <kchai@redhat.com> 2018-09-18 03:21:13 +00:00			`log.info("Successfully stopped single ceph mgr service")`
tasks: update to run ceph-mgr daemons Signed-off-by: John Spray <john.spray@redhat.com> 2016-09-27 11:22:45 +00:00			`else:`
			`log.info("Failed to stop ceph mgr service")`
			`remote.run(args=['sudo', 'systemctl', 'start', mgr_name])`
			`time.sleep(4)`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`if mds_role_name in roles:`
			`remote.run(args=['sudo', 'systemctl', 'status', mds_name])`
			`remote.run(args=['sudo', 'systemctl', 'stop', mds_name])`
Few comment fixes Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-06-29 18:30:02 +00:00			`time.sleep(4) # immediate check will result in deactivating state`
qa/tasks/systemd: get rid of cStringIO for py3 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2020-02-21 19:59:47 +00:00			`status = _remote_service_status(remote, mds_name)`
			`if status.find('Active: inactive'):`
qa: fix typos Signed-off-by: Kefu Chai <kchai@redhat.com> 2018-09-18 03:21:13 +00:00			`log.info("Successfully stopped single ceph mds service")`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`else:`
			`log.info("Failed to stop ceph mds service")`
			`remote.run(args=['sudo', 'systemctl', 'start', mds_name])`
			`time.sleep(4)`
Add reboot case for systemd test test systemd units restart after reboot Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2017-03-29 16:27:20 +00:00
			`# reboot all nodes and verify the systemd units restart`
			`# workunit that runs would fail if any of the systemd unit doesnt start`
			`ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)`
			`# avoid immediate reconnect`
			`time.sleep(120)`
			`reconnect(ctx, 480) # reconnect all nodes`
			`# for debug info`
			`ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('\|'),`
			`'grep', 'ceph'])`
			`# wait for HEALTH_OK`
			`mon = get_first_mon(ctx, config)`
qa: get rid of iterkeys for py3 compatibility Fixes: https://tracker.ceph.com/issues/42287 Signed-off-by: Kyr Shatskyy <kyrylo.shatskyy@suse.com> 2019-10-11 15:57:47 +00:00			`(mon_remote,) = ctx.cluster.only(mon).remotes.keys()`
use sudo to check check health Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2017-04-11 20:51:47 +00:00			`wait_until_healthy(ctx, mon_remote, use_sudo=True)`
Systemd script to test ceph init files Signed-off-by: Vasu Kulkarni <vasu@redhat.com> 2016-05-18 21:32:59 +00:00			`yield`