ceph/teuthology/queue.py

252 lines
8.0 KiB
Python
Raw Normal View History

import argparse
import fcntl
import logging
import os
import subprocess
2013-08-23 14:59:48 +00:00
import shutil
import sys
import tempfile
import time
import yaml
import beanstalkc
from .config import config as teuth_config
from . import safepath
log = logging.getLogger(__name__)
# simple flock class
class filelock(object):
def __init__(self, fn):
self.fn = fn
self.fd = None
def acquire(self):
assert not self.fd
self.fd = file(self.fn, 'w')
fcntl.lockf(self.fd, fcntl.LOCK_EX)
def release(self):
assert self.fd
fcntl.lockf(self.fd, fcntl.LOCK_UN)
self.fd = None
def connect(ctx):
host = ctx.teuthology_config['queue_host']
port = ctx.teuthology_config['queue_port']
return beanstalkc.Connection(host=host, port=port)
def fetch_teuthology_branch(path, branch='master'):
"""
Make sure we have the correct teuthology branch checked out and up-to-date
"""
# only let one worker create/update the checkout at a time
lock = filelock('%s.lock' % path)
lock.acquire()
try:
if not os.path.isdir(path):
log.info("Cloning %s from upstream", branch)
teuthology_git_upstream = teuth_config.ceph_git_base_url + 'teuthology.git'
log.info(
subprocess.check_output(('git', 'clone', '--branch', branch,
teuthology_git_upstream, path),
cwd=os.path.dirname(path))
)
elif time.time() - os.stat('/etc/passwd').st_mtime > 60:
# only do this at most once per minute
log.info("Fetching %s from upstream", branch)
log.info(
subprocess.check_output(('git', 'fetch', '-p', 'origin'),
cwd=path)
)
log.info(
subprocess.check_output(('touch', path))
)
else:
log.info("%s was just updated; assuming it is current", branch)
# This try/except block will notice if the requested branch doesn't
# exist, whether it was cloned or fetched.
try:
subprocess.check_output(
('git', 'reset', '--hard', 'origin/%s' % branch),
cwd=path,
)
except subprocess.CalledProcessError:
log.exception("teuthology branch not found: %s", branch)
shutil.rmtree(path)
raise
log.info("Bootstrapping %s", path)
# This magic makes the bootstrap script not attempt to clobber an
# existing virtualenv. But the branch's bootstrap needs to actually
# check for the NO_CLOBBER variable.
env = os.environ.copy()
env['NO_CLOBBER'] = '1'
log.info(
subprocess.check_output(('./bootstrap'), cwd=path, env=env)
)
finally:
lock.release()
def worker():
parser = argparse.ArgumentParser(description="""
Grab jobs from a beanstalk queue and run the teuthology tests they
describe. One job is run at a time.
""")
parser.add_argument(
'-v', '--verbose',
action='store_true', default=None,
help='be more verbose',
)
parser.add_argument(
'--archive-dir',
metavar='DIR',
help='path under which to archive results',
required=True,
)
parser.add_argument(
'-l', '--log-dir',
help='path in which to store logs',
required=True,
)
parser.add_argument(
'-t', '--tube',
help='which beanstalk tube to read jobs from',
required=True,
)
ctx = parser.parse_args()
loglevel = logging.INFO
if ctx.verbose:
loglevel = logging.DEBUG
logging.basicConfig(
level=loglevel,
filename=os.path.join(ctx.log_dir, 'worker.{tube}.{pid}'.format(
pid=os.getpid(),
tube=ctx.tube,
)),
format='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
datefmt='%Y-%m-%dT%H:%M:%S',
)
if not os.path.isdir(ctx.archive_dir):
sys.exit("{prog}: archive directory must exist: {path}".format(
prog=os.path.basename(sys.argv[0]),
path=ctx.archive_dir,
))
from teuthology.misc import read_config
read_config(ctx)
beanstalk = connect(ctx)
beanstalk.watch(ctx.tube)
beanstalk.ignore('default')
while True:
job = beanstalk.reserve(timeout=60)
if job is None:
continue
# bury the job so it won't be re-run if it fails
job.bury()
log.debug('Reserved job %d', job.jid)
log.debug('Config is: %s', job.body)
job_config = yaml.safe_load(job.body)
job_config['job_id'] = job.jid
safe_archive = safepath.munge(job_config['name'])
archive_path_full = os.path.join(ctx.archive_dir, safe_archive, str(job.jid))
job_config['archive_path'] = archive_path_full
# If the teuthology branch was not specified, default to master and
# store that value.
teuthology_branch = job_config.get('teuthology_branch', 'master')
job_config['teuthology_branch'] = teuthology_branch
teuth_path = os.path.join(os.getenv("HOME"),
'teuthology-' + teuthology_branch)
2013-08-23 14:59:48 +00:00
fetch_teuthology_branch(path=teuth_path, branch=teuthology_branch)
queue: only bootstrap new checkouts Until we figure out why bootstrap is getting stuck like this: 9851 pts/7 S 0:03 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs 2075 pts/7 Z 0:00 \_ [git] <defunct> 2112 pts/7 Z 0:00 \_ [git] <defunct> 2138 pts/7 Z 0:00 \_ [bootstrap] <defunct> 9852 pts/7 S 0:03 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs 2153 pts/7 Z 0:00 \_ [git] <defunct> 2177 pts/7 Z 0:00 \_ [git] <defunct> 2264 pts/7 Z 0:00 \_ [bootstrap] <defunct> 9853 pts/7 S 0:03 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs 2141 pts/7 Z 0:00 \_ [git] <defunct> 2276 pts/7 Z 0:00 \_ [git] <defunct> 2305 pts/7 Z 0:00 \_ [bootstrap] <defunct> 9854 pts/7 S 0:03 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs 7448 pts/7 Z 0:00 \_ [git] <defunct> 7449 pts/7 Z 0:00 \_ [git] <defunct> 7450 pts/7 Z 0:00 \_ [bootstrap] <defunct> 7452 pts/7 Z 0:00 \_ [teuthology-resu] <defunct> 9855 pts/7 S 0:01 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs 7712 pts/7 Z 0:00 \_ [git] <defunct> 7713 pts/7 Z 0:00 \_ [git] <defunct> 7714 pts/7 Z 0:00 \_ [bootstrap] <defunct> 7716 pts/7 Z 0:00 \_ [teuthology-resu] <defunct> 9856 pts/7 S 0:03 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs 2316 pts/7 Z 0:00 \_ [bootstrap] <defunct> 9857 pts/7 S 0:03 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs 2340 pts/7 Z 0:00 \_ [bootstrap] <defunct> 9858 pts/7 S 0:01 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs 23188 pts/7 Z 0:00 \_ [bootstrap] <defunct> 9859 pts/7 S 0:03 /home/teuthworker/teuthology-master/virtualenv/bin/python ./teuthology-master/virtualenv/bin/teuthology-worker -v --archive-dir /var/lib/teuthworker/archive --tube plana --log-dir /var/lib/teuthworker/archive/worker_logs Signed-off-by: Sage Weil <sage@inktank.com>
2013-08-23 05:14:41 +00:00
teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
if not os.path.isdir(teuth_bin_path):
2013-08-23 14:59:48 +00:00
raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
(teuthology_branch, teuth_bin_path))
2013-07-19 13:01:28 +00:00
if job_config.get('last_in_suite'):
log.debug('Generating coverage for %s', job_config['name'])
args = [
os.path.join(teuth_bin_path, 'teuthology-results'),
'--timeout',
str(job_config.get('results_timeout', 21600)),
'--email',
job_config['email'],
'--archive-dir',
os.path.join(ctx.archive_dir, safe_archive),
'--name',
job_config['name'],
]
subprocess.Popen(args=args)
else:
log.debug('Creating archive dir...')
safepath.makedirs(ctx.archive_dir, safe_archive)
log.info('Running job %d', job.jid)
run_job(job_config, teuth_bin_path)
job.delete()
def run_job(job_config, teuth_bin_path):
arg = [
os.path.join(teuth_bin_path, 'teuthology'),
]
# The following is for compatibility with older schedulers, from before we
# started merging the contents of job_config['config'] into job_config
# itself.
if 'config' in job_config:
inner_config = job_config.pop('config')
if not isinstance(inner_config, dict):
log.debug("run_job: job_config['config'] isn't a dict, it's a %s",
str(type(inner_config)))
else:
job_config.update(inner_config)
if job_config['verbose']:
arg.append('-v')
arg.extend([
'--lock',
'--block',
'--owner', job_config['owner'],
'--archive', job_config['archive_path'],
'--name', job_config['name'],
])
if job_config['description'] is not None:
arg.extend(['--description', job_config['description']])
arg.append('--')
with tempfile.NamedTemporaryFile(prefix='teuthology-worker.',
suffix='.tmp',) as tmp:
2013-09-11 20:14:58 +00:00
yaml.safe_dump(data=job_config, stream=tmp)
tmp.flush()
arg.append(tmp.name)
p = subprocess.Popen(
args=arg,
close_fds=True,
stderr=subprocess.PIPE,
)
child = logging.getLogger(__name__ + '.child')
for line in p.stderr:
child.error(': %s', line.rstrip('\n'))
p.wait()
if p.returncode != 0:
log.error('Child exited with code %d', p.returncode)
else:
log.info('Success!')