2011-07-08 18:37:20 +00:00
|
|
|
import argparse
|
2013-08-23 20:43:18 +00:00
|
|
|
import fcntl
|
2011-07-08 18:37:20 +00:00
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import subprocess
|
2013-08-23 14:59:48 +00:00
|
|
|
import shutil
|
2011-07-08 18:37:20 +00:00
|
|
|
import sys
|
|
|
|
import tempfile
|
2013-08-27 22:58:14 +00:00
|
|
|
import time
|
2011-07-08 18:37:20 +00:00
|
|
|
import yaml
|
|
|
|
|
|
|
|
import beanstalkc
|
|
|
|
|
2013-09-20 20:12:02 +00:00
|
|
|
from .config import config as teuth_config
|
|
|
|
from . import safepath
|
2011-07-08 18:37:20 +00:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2013-08-23 20:43:18 +00:00
|
|
|
# simple flock class
|
|
|
|
class filelock(object):
|
|
|
|
def __init__(self, fn):
|
|
|
|
self.fn = fn
|
|
|
|
self.fd = None
|
|
|
|
|
|
|
|
def acquire(self):
|
|
|
|
assert not self.fd
|
|
|
|
self.fd = file(self.fn, 'w')
|
|
|
|
fcntl.lockf(self.fd, fcntl.LOCK_EX)
|
|
|
|
|
|
|
|
def release(self):
|
|
|
|
assert self.fd
|
|
|
|
fcntl.lockf(self.fd, fcntl.LOCK_UN)
|
|
|
|
self.fd = None
|
|
|
|
|
2013-08-22 19:47:18 +00:00
|
|
|
|
2011-07-08 18:37:20 +00:00
|
|
|
def connect(ctx):
|
|
|
|
host = ctx.teuthology_config['queue_host']
|
|
|
|
port = ctx.teuthology_config['queue_port']
|
|
|
|
return beanstalkc.Connection(host=host, port=port)
|
|
|
|
|
2013-08-22 20:30:31 +00:00
|
|
|
|
2013-08-23 15:08:01 +00:00
|
|
|
def fetch_teuthology_branch(path, branch='master'):
|
|
|
|
"""
|
|
|
|
Make sure we have the correct teuthology branch checked out and up-to-date
|
|
|
|
"""
|
2013-08-23 20:43:18 +00:00
|
|
|
# only let one worker create/update the checkout at a time
|
2013-08-28 18:36:15 +00:00
|
|
|
lock = filelock('%s.lock' % path)
|
2013-08-23 20:43:18 +00:00
|
|
|
lock.acquire()
|
2013-08-23 15:08:01 +00:00
|
|
|
try:
|
2013-08-23 20:43:18 +00:00
|
|
|
if not os.path.isdir(path):
|
|
|
|
log.info("Cloning %s from upstream", branch)
|
2013-09-20 20:12:02 +00:00
|
|
|
teuthology_git_upstream = teuth_config.ceph_git_base_url + 'teuthology.git'
|
2013-08-23 20:43:18 +00:00
|
|
|
log.info(
|
|
|
|
subprocess.check_output(('git', 'clone', '--branch', branch,
|
|
|
|
teuthology_git_upstream, path),
|
2013-08-28 18:36:15 +00:00
|
|
|
cwd=os.path.dirname(path))
|
|
|
|
)
|
2013-08-23 22:43:35 +00:00
|
|
|
elif time.time() - os.stat('/etc/passwd').st_mtime > 60:
|
|
|
|
# only do this at most once per minute
|
2013-08-23 20:43:18 +00:00
|
|
|
log.info("Fetching %s from upstream", branch)
|
|
|
|
log.info(
|
|
|
|
subprocess.check_output(('git', 'fetch', '-p', 'origin'),
|
|
|
|
cwd=path)
|
2013-09-12 16:14:08 +00:00
|
|
|
)
|
2013-08-23 22:43:35 +00:00
|
|
|
log.info(
|
|
|
|
subprocess.check_output(('touch', path))
|
2013-09-12 16:14:08 +00:00
|
|
|
)
|
2013-08-23 22:43:35 +00:00
|
|
|
else:
|
|
|
|
log.info("%s was just updated; assuming it is current", branch)
|
2013-08-23 20:43:18 +00:00
|
|
|
|
|
|
|
# This try/except block will notice if the requested branch doesn't
|
|
|
|
# exist, whether it was cloned or fetched.
|
|
|
|
try:
|
2013-09-12 16:14:08 +00:00
|
|
|
subprocess.check_output(
|
|
|
|
('git', 'reset', '--hard', 'origin/%s' % branch),
|
|
|
|
cwd=path,
|
|
|
|
)
|
2013-08-23 20:43:18 +00:00
|
|
|
except subprocess.CalledProcessError:
|
2013-09-12 16:14:08 +00:00
|
|
|
log.exception("teuthology branch not found: %s", branch)
|
2013-08-23 20:43:18 +00:00
|
|
|
shutil.rmtree(path)
|
|
|
|
raise
|
|
|
|
|
|
|
|
log.info("Bootstrapping %s", path)
|
|
|
|
# This magic makes the bootstrap script not attempt to clobber an
|
|
|
|
# existing virtualenv. But the branch's bootstrap needs to actually
|
|
|
|
# check for the NO_CLOBBER variable.
|
|
|
|
env = os.environ.copy()
|
|
|
|
env['NO_CLOBBER'] = '1'
|
|
|
|
log.info(
|
|
|
|
subprocess.check_output(('./bootstrap'), cwd=path, env=env)
|
2013-09-12 16:14:08 +00:00
|
|
|
)
|
2013-08-23 15:08:01 +00:00
|
|
|
|
2013-08-23 20:43:18 +00:00
|
|
|
finally:
|
|
|
|
lock.release()
|
2013-08-23 15:08:01 +00:00
|
|
|
|
2011-07-08 18:37:20 +00:00
|
|
|
def worker():
|
|
|
|
parser = argparse.ArgumentParser(description="""
|
|
|
|
Grab jobs from a beanstalk queue and run the teuthology tests they
|
|
|
|
describe. One job is run at a time.
|
|
|
|
""")
|
|
|
|
parser.add_argument(
|
|
|
|
'-v', '--verbose',
|
|
|
|
action='store_true', default=None,
|
|
|
|
help='be more verbose',
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'--archive-dir',
|
|
|
|
metavar='DIR',
|
|
|
|
help='path under which to archive results',
|
|
|
|
required=True,
|
|
|
|
)
|
2011-08-03 22:28:46 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'-l', '--log-dir',
|
|
|
|
help='path in which to store logs',
|
|
|
|
required=True,
|
|
|
|
)
|
2012-09-21 21:54:19 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'-t', '--tube',
|
|
|
|
help='which beanstalk tube to read jobs from',
|
|
|
|
required=True,
|
|
|
|
)
|
2011-07-08 18:37:20 +00:00
|
|
|
|
|
|
|
ctx = parser.parse_args()
|
|
|
|
|
|
|
|
loglevel = logging.INFO
|
|
|
|
if ctx.verbose:
|
|
|
|
loglevel = logging.DEBUG
|
|
|
|
|
|
|
|
logging.basicConfig(
|
|
|
|
level=loglevel,
|
2013-09-06 16:18:57 +00:00
|
|
|
filename=os.path.join(ctx.log_dir, 'worker.{tube}.{pid}'.format(
|
|
|
|
pid=os.getpid(),
|
|
|
|
tube=ctx.tube,
|
|
|
|
)),
|
2011-08-03 22:28:46 +00:00
|
|
|
format='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
|
|
|
|
datefmt='%Y-%m-%dT%H:%M:%S',
|
2011-07-08 18:37:20 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
if not os.path.isdir(ctx.archive_dir):
|
|
|
|
sys.exit("{prog}: archive directory must exist: {path}".format(
|
|
|
|
prog=os.path.basename(sys.argv[0]),
|
|
|
|
path=ctx.archive_dir,
|
|
|
|
))
|
|
|
|
|
|
|
|
from teuthology.misc import read_config
|
|
|
|
read_config(ctx)
|
|
|
|
|
|
|
|
beanstalk = connect(ctx)
|
2012-09-21 21:54:19 +00:00
|
|
|
beanstalk.watch(ctx.tube)
|
2011-07-08 18:37:20 +00:00
|
|
|
beanstalk.ignore('default')
|
|
|
|
|
|
|
|
while True:
|
|
|
|
job = beanstalk.reserve(timeout=60)
|
|
|
|
if job is None:
|
|
|
|
continue
|
|
|
|
|
|
|
|
# bury the job so it won't be re-run if it fails
|
|
|
|
job.bury()
|
2011-08-26 00:11:33 +00:00
|
|
|
log.debug('Reserved job %d', job.jid)
|
2011-08-26 00:09:03 +00:00
|
|
|
log.debug('Config is: %s', job.body)
|
|
|
|
job_config = yaml.safe_load(job.body)
|
2013-09-11 14:59:45 +00:00
|
|
|
|
2013-10-04 17:35:03 +00:00
|
|
|
job_config['job_id'] = str(job.jid)
|
2011-08-26 00:09:03 +00:00
|
|
|
safe_archive = safepath.munge(job_config['name'])
|
2013-09-11 14:59:45 +00:00
|
|
|
archive_path_full = os.path.join(ctx.archive_dir, safe_archive, str(job.jid))
|
|
|
|
job_config['archive_path'] = archive_path_full
|
|
|
|
|
2013-09-11 20:40:14 +00:00
|
|
|
# If the teuthology branch was not specified, default to master and
|
|
|
|
# store that value.
|
|
|
|
teuthology_branch = job_config.get('teuthology_branch', 'master')
|
|
|
|
job_config['teuthology_branch'] = teuthology_branch
|
2011-07-08 18:37:20 +00:00
|
|
|
|
2013-08-22 19:47:18 +00:00
|
|
|
teuth_path = os.path.join(os.getenv("HOME"),
|
|
|
|
'teuthology-' + teuthology_branch)
|
2013-08-23 14:59:48 +00:00
|
|
|
|
2013-08-23 15:08:01 +00:00
|
|
|
fetch_teuthology_branch(path=teuth_path, branch=teuthology_branch)
|
2013-08-23 05:14:41 +00:00
|
|
|
|
2013-08-22 19:47:18 +00:00
|
|
|
teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
|
|
|
|
if not os.path.isdir(teuth_bin_path):
|
2013-08-23 14:59:48 +00:00
|
|
|
raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
|
2013-08-22 19:47:18 +00:00
|
|
|
(teuthology_branch, teuth_bin_path))
|
|
|
|
|
2013-07-19 13:01:28 +00:00
|
|
|
if job_config.get('last_in_suite'):
|
2011-08-26 00:11:33 +00:00
|
|
|
log.debug('Generating coverage for %s', job_config['name'])
|
2011-08-29 19:42:45 +00:00
|
|
|
args = [
|
2013-08-22 19:47:18 +00:00
|
|
|
os.path.join(teuth_bin_path, 'teuthology-results'),
|
2011-08-29 19:42:45 +00:00
|
|
|
'--timeout',
|
2011-09-23 01:23:36 +00:00
|
|
|
str(job_config.get('results_timeout', 21600)),
|
2011-08-29 19:42:45 +00:00
|
|
|
'--email',
|
|
|
|
job_config['email'],
|
|
|
|
'--archive-dir',
|
|
|
|
os.path.join(ctx.archive_dir, safe_archive),
|
|
|
|
'--name',
|
|
|
|
job_config['name'],
|
2013-08-22 19:47:18 +00:00
|
|
|
]
|
2011-08-29 19:42:45 +00:00
|
|
|
subprocess.Popen(args=args)
|
2011-08-26 00:11:33 +00:00
|
|
|
else:
|
|
|
|
log.debug('Creating archive dir...')
|
|
|
|
safepath.makedirs(ctx.archive_dir, safe_archive)
|
|
|
|
log.info('Running job %d', job.jid)
|
2013-09-11 14:59:45 +00:00
|
|
|
run_job(job_config, teuth_bin_path)
|
2011-10-04 19:32:58 +00:00
|
|
|
job.delete()
|
2011-07-08 18:37:20 +00:00
|
|
|
|
2013-08-22 19:47:18 +00:00
|
|
|
|
2013-09-11 14:59:45 +00:00
|
|
|
def run_job(job_config, teuth_bin_path):
|
2011-07-08 18:37:20 +00:00
|
|
|
arg = [
|
2013-08-22 19:47:18 +00:00
|
|
|
os.path.join(teuth_bin_path, 'teuthology'),
|
|
|
|
]
|
2013-09-16 18:14:52 +00:00
|
|
|
# The following is for compatibility with older schedulers, from before we
|
|
|
|
# started merging the contents of job_config['config'] into job_config
|
|
|
|
# itself.
|
|
|
|
if 'config' in job_config:
|
|
|
|
inner_config = job_config.pop('config')
|
|
|
|
if not isinstance(inner_config, dict):
|
|
|
|
log.debug("run_job: job_config['config'] isn't a dict, it's a %s",
|
|
|
|
str(type(inner_config)))
|
|
|
|
else:
|
|
|
|
job_config.update(inner_config)
|
2011-07-08 18:37:20 +00:00
|
|
|
|
|
|
|
if job_config['verbose']:
|
|
|
|
arg.append('-v')
|
|
|
|
|
|
|
|
arg.extend([
|
2013-08-29 21:12:36 +00:00
|
|
|
'--lock',
|
|
|
|
'--block',
|
|
|
|
'--owner', job_config['owner'],
|
2013-09-11 14:59:45 +00:00
|
|
|
'--archive', job_config['archive_path'],
|
2013-08-29 21:12:36 +00:00
|
|
|
'--name', job_config['name'],
|
|
|
|
])
|
2011-07-08 18:37:20 +00:00
|
|
|
if job_config['description'] is not None:
|
|
|
|
arg.extend(['--description', job_config['description']])
|
|
|
|
arg.append('--')
|
|
|
|
|
2013-08-29 21:12:36 +00:00
|
|
|
with tempfile.NamedTemporaryFile(prefix='teuthology-worker.',
|
|
|
|
suffix='.tmp',) as tmp:
|
2013-09-11 20:14:58 +00:00
|
|
|
yaml.safe_dump(data=job_config, stream=tmp)
|
2012-08-09 16:42:35 +00:00
|
|
|
tmp.flush()
|
2012-08-08 21:45:49 +00:00
|
|
|
arg.append(tmp.name)
|
2012-08-08 21:48:21 +00:00
|
|
|
p = subprocess.Popen(
|
|
|
|
args=arg,
|
|
|
|
close_fds=True,
|
2013-09-06 16:39:22 +00:00
|
|
|
stderr=subprocess.PIPE,
|
2013-08-29 21:12:36 +00:00
|
|
|
)
|
2012-08-08 21:48:21 +00:00
|
|
|
child = logging.getLogger(__name__ + '.child')
|
2013-08-29 21:12:36 +00:00
|
|
|
for line in p.stderr:
|
|
|
|
child.error(': %s', line.rstrip('\n'))
|
2012-08-08 21:48:21 +00:00
|
|
|
p.wait()
|
|
|
|
if p.returncode != 0:
|
2012-08-14 22:08:21 +00:00
|
|
|
log.error('Child exited with code %d', p.returncode)
|
2012-08-08 21:44:47 +00:00
|
|
|
else:
|
|
|
|
log.info('Success!')
|