mirror of
https://github.com/ceph/ceph
synced 2025-01-21 02:31:19 +00:00
task/restart: Restart task for testing daemon kill
The ceph daemons support being killed at a specific code point with a config option. In some cases, we want to test a kill point only once for a given daemon run (such as replay that only occurs during daemon startup). This task allows running a script or executable and (when the script sends a command to the task) restarting it with a temporary config that has the appropriate kill point set. Once the daemon asserts and gets restarted, the original config is used. Adds a specific restart_with_args() method to the DaemonState in the ceph task. Right now this task follows the workunit task closely, but uses stdout/stdin to specify when to restart a daemon. Signed-off-by: Sam Lang <sam.lang@inktank.com>
This commit is contained in:
parent
9e81ff58e5
commit
6fe1deadbf
@ -10,16 +10,17 @@ log = logging.getLogger(__name__)
|
||||
|
||||
class RemoteProcess(object):
|
||||
__slots__ = [
|
||||
'command', 'stdin', 'stdout', 'stderr', 'exitstatus',
|
||||
'command', 'stdin', 'stdout', 'stderr', 'exitstatus', 'exited',
|
||||
# for orchestra.remote.Remote to place a backreference
|
||||
'remote',
|
||||
]
|
||||
def __init__(self, command, stdin, stdout, stderr, exitstatus):
|
||||
def __init__(self, command, stdin, stdout, stderr, exitstatus, exited):
|
||||
self.command = command
|
||||
self.stdin = stdin
|
||||
self.stdout = stdout
|
||||
self.stderr = stderr
|
||||
self.exitstatus = exitstatus
|
||||
self.exited = exited
|
||||
|
||||
class Raw(object):
|
||||
def __init__(self, value):
|
||||
@ -65,6 +66,9 @@ def execute(client, args):
|
||||
status = None
|
||||
return status
|
||||
|
||||
def exitstatus_ready():
|
||||
return out.channel.exit_status_ready()
|
||||
|
||||
r = RemoteProcess(
|
||||
command=cmd,
|
||||
stdin=in_,
|
||||
@ -73,6 +77,7 @@ def execute(client, args):
|
||||
# this is a callable that will block until the status is
|
||||
# available
|
||||
exitstatus=get_exitstatus,
|
||||
exited=exitstatus_ready,
|
||||
)
|
||||
return r
|
||||
|
||||
|
@ -48,12 +48,32 @@ class DaemonState(object):
|
||||
self.proc = self.remote.run(*cmd_args, **cmd_kwargs)
|
||||
self.log.info('Started')
|
||||
|
||||
def restart_with_args(self, extra_args):
|
||||
self.log.info('Restarting')
|
||||
if self.proc is not None:
|
||||
self.log.debug('stopping old one...')
|
||||
self.stop()
|
||||
cmd_args = list(self.command_args)
|
||||
# we only want to make a temporary mod of the args list
|
||||
# so we shallow copy the dict, and deepcopy the args list
|
||||
cmd_kwargs = self.command_kwargs.copy()
|
||||
from copy import deepcopy
|
||||
cmd_kwargs['args'] = deepcopy(self.command_kwargs['args'])
|
||||
cmd_kwargs['args'].extend(extra_args)
|
||||
self.proc = self.remote.run(*cmd_args, **cmd_kwargs)
|
||||
self.log.info('Started')
|
||||
|
||||
|
||||
def running(self):
|
||||
return self.proc is not None
|
||||
return self.proc is not None and not self.proc.exited
|
||||
|
||||
def reset(self):
|
||||
self.proc = None
|
||||
|
||||
def wait_for_exit(self):
|
||||
if self.proc:
|
||||
run.wait([self.proc])
|
||||
self.proc = None
|
||||
|
||||
class CephState(object):
|
||||
def __init__(self):
|
||||
|
151
teuthology/task/restart.py
Normal file
151
teuthology/task/restart.py
Normal file
@ -0,0 +1,151 @@
|
||||
import logging
|
||||
import pipes
|
||||
|
||||
from teuthology import misc as teuthology
|
||||
from teuthology.orchestra import run as tor
|
||||
|
||||
from ..orchestra import run
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def restart_daemon(ctx, config, role, id_, *args):
|
||||
log.info('Restarting {r}.{i} daemon...'.format(r=role, i=id_))
|
||||
daemon = ctx.daemons.get_daemon(role, id_)
|
||||
log.debug('Waiting for exit of {r}.{i} daemon...'.format(r=role, i=id_))
|
||||
try:
|
||||
daemon.wait_for_exit()
|
||||
except tor.CommandFailedError as e:
|
||||
log.debug('Command Failed: {e}'.format(e=e))
|
||||
if len(args) > 0:
|
||||
confargs = ['--{k}={v}'.format(k=k, v=v) for k,v in zip(args[0::2], args[1::2])]
|
||||
log.debug('Doing restart of {r}.{i} daemon with args: {a}...'.format(r=role, i=id_, a=confargs))
|
||||
daemon.restart_with_args(confargs)
|
||||
else:
|
||||
log.debug('Doing restart of {r}.{i} daemon...'.format(r=role, i=id_))
|
||||
daemon.restart()
|
||||
|
||||
def get_tests(ctx, config, role, remote, testdir):
|
||||
srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role)
|
||||
|
||||
refspec = config.get('branch')
|
||||
if refspec is None:
|
||||
refspec = config.get('sha1')
|
||||
if refspec is None:
|
||||
refspec = config.get('tag')
|
||||
if refspec is None:
|
||||
refspec = 'HEAD'
|
||||
log.info('Pulling restart qa/workunits from ref %s', refspec)
|
||||
|
||||
remote.run(
|
||||
logger=log.getChild(role),
|
||||
args=[
|
||||
'mkdir', '--', srcdir,
|
||||
run.Raw('&&'),
|
||||
'git',
|
||||
'archive',
|
||||
'--remote=git://ceph.newdream.net/git/ceph.git',
|
||||
'%s:qa/workunits' % refspec,
|
||||
run.Raw('|'),
|
||||
'tar',
|
||||
'-C', srcdir,
|
||||
'-x',
|
||||
'-f-',
|
||||
run.Raw('&&'),
|
||||
'cd', '--', srcdir,
|
||||
run.Raw('&&'),
|
||||
'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
|
||||
run.Raw('&&'),
|
||||
'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
|
||||
run.Raw('>{tdir}/restarts.list'.format(tdir=testdir)),
|
||||
],
|
||||
)
|
||||
restarts = sorted(teuthology.get_file(
|
||||
remote,
|
||||
'{tdir}/restarts.list'.format(tdir=testdir)).split('\0'))
|
||||
return (srcdir, restarts)
|
||||
|
||||
def task(ctx, config):
|
||||
"""
|
||||
Execute commands and allow daemon restart with config options.
|
||||
Each process executed can output to stdout restart commands of the form:
|
||||
restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2>
|
||||
This will restart the daemon <role>.<id> with the specified config values once
|
||||
by modifying the conf file with those values, and then replacing the old conf file
|
||||
once the daemon is restarted.
|
||||
This task does not kill a running daemon, it assumes the daemon will abort on an
|
||||
assert specified in the config.
|
||||
|
||||
tasks:
|
||||
- install:
|
||||
- ceph:
|
||||
- restart:
|
||||
exec:
|
||||
client.0:
|
||||
- test_backtraces.py
|
||||
|
||||
"""
|
||||
assert isinstance(config, dict), "task kill got invalid config"
|
||||
|
||||
testdir = teuthology.get_testdir(ctx)
|
||||
|
||||
try:
|
||||
assert 'exec' in config, "config requires exec key with <role>: <command> entries"
|
||||
for role, task in config['exec'].iteritems():
|
||||
log.info('restart for role {r}'.format(r=role))
|
||||
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
|
||||
srcdir, restarts = get_tests(ctx, config, role, remote, testdir)
|
||||
log.info('Running command on role %s host %s', role, remote.name)
|
||||
spec = '{spec}'.format(spec=task[0])
|
||||
log.info('Restarts list: %s', restarts)
|
||||
log.info('Spec is %s', spec)
|
||||
to_run = [w for w in restarts if w == task or w.find(spec) != -1]
|
||||
log.info('To run: %s', to_run)
|
||||
for c in to_run:
|
||||
log.info('Running restart script %s...', c)
|
||||
args = [
|
||||
run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
|
||||
run.Raw('PYTHONPATH="$PYTHONPATH:{tdir}/binary/usr/local/lib/python2.7/dist-packages:{tdir}/binary/usr/local/lib/python2.6/dist-packages"'.format(tdir=testdir)),
|
||||
]
|
||||
env = config.get('env')
|
||||
if env is not None:
|
||||
for var, val in env.iteritems():
|
||||
quoted_val = pipes.quote(val)
|
||||
env_arg = '{var}={val}'.format(var=var, val=quoted_val)
|
||||
args.append(run.Raw(env_arg))
|
||||
args.extend([
|
||||
'{tdir}/enable-coredump'.format(tdir=testdir),
|
||||
'ceph-coverage',
|
||||
'{tdir}/archive/coverage'.format(tdir=testdir),
|
||||
'{srcdir}/{c}'.format(
|
||||
srcdir=srcdir,
|
||||
c=c,
|
||||
),
|
||||
])
|
||||
proc = remote.run(
|
||||
args=args,
|
||||
stdout=tor.PIPE,
|
||||
stdin=tor.PIPE,
|
||||
stderr=log,
|
||||
wait=False,
|
||||
)
|
||||
log.info('waiting for a command from script')
|
||||
while True:
|
||||
l = proc.stdout.readline()
|
||||
if not l or l == '':
|
||||
break
|
||||
log.debug('script command: {c}'.format(c=l))
|
||||
ll = l.strip()
|
||||
cmd = ll.split(' ')
|
||||
if cmd == "done":
|
||||
break
|
||||
assert cmd[0] == 'restart', "script sent invalid command request to kill task"
|
||||
# cmd should be: restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2>
|
||||
# or to clear, just: restart <role> <id>
|
||||
restart_daemon(ctx, config, cmd[1], cmd[2], *cmd[3:])
|
||||
proc.stdin.writelines(['restarted\n'])
|
||||
proc.stdin.flush()
|
||||
tor.wait([proc])
|
||||
e = proc.exitstatus
|
||||
if e != 0:
|
||||
raise Exception('restart task got non-zero exit status {d} from script: {s}'.format(d=e, s=c))
|
||||
finally:
|
||||
pass
|
Loading…
Reference in New Issue
Block a user