mirror of
https://github.com/ceph/ceph
synced 2025-01-12 06:00:46 +00:00
511 lines
16 KiB
Python
511 lines
16 KiB
Python
import argparse
|
|
import copy
|
|
import errno
|
|
import itertools
|
|
import logging
|
|
import os
|
|
import re
|
|
|
|
# this file is responsible for submitting tests into the queue
|
|
# by generating combinations of facets found in
|
|
# https://github.com/ceph/ceph-qa-suite.git
|
|
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import yaml
|
|
|
|
from teuthology import misc as teuthology
|
|
from teuthology import safepath
|
|
from teuthology import lock as lock
|
|
from teuthology.config import config
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="""
|
|
Run a suite of ceph integration tests.
|
|
|
|
A suite is a set of collections.
|
|
|
|
A collection is a directory containing facets.
|
|
|
|
A facet is a directory containing config snippets.
|
|
|
|
Running a collection means running teuthology for every configuration
|
|
combination generated by taking one config snippet from each facet.
|
|
|
|
Any config files passed on the command line will be used for every
|
|
combination, and will override anything in the suite.
|
|
""")
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true', default=None,
|
|
help='be more verbose',
|
|
)
|
|
parser.add_argument(
|
|
'--name',
|
|
help='name for this suite',
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
'--collections',
|
|
metavar='DIR',
|
|
nargs='+',
|
|
required=True,
|
|
help='the collections to run',
|
|
)
|
|
parser.add_argument(
|
|
'--owner',
|
|
help='job owner',
|
|
)
|
|
parser.add_argument(
|
|
'--email',
|
|
help='address to email test failures to',
|
|
)
|
|
parser.add_argument(
|
|
'--timeout',
|
|
help='how many seconds to wait for jobs to finish before emailing results',
|
|
)
|
|
parser.add_argument(
|
|
'-n', '--num',
|
|
default=1,
|
|
type=int,
|
|
help='number of times to run/queue each job'
|
|
)
|
|
parser.add_argument(
|
|
'-w', '--worker',
|
|
default='plana',
|
|
help='which worker to use (type of machine)',
|
|
)
|
|
parser.add_argument(
|
|
'config',
|
|
metavar='CONFFILE',
|
|
nargs='*',
|
|
default=[],
|
|
help='config file to read',
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
loglevel = logging.INFO
|
|
if args.verbose:
|
|
loglevel = logging.DEBUG
|
|
|
|
logging.basicConfig(
|
|
level=loglevel,
|
|
)
|
|
|
|
base_arg = [
|
|
os.path.join(os.path.dirname(sys.argv[0]), 'teuthology-schedule'),
|
|
'--name', args.name,
|
|
'--num', str(args.num),
|
|
'--worker', args.worker,
|
|
]
|
|
if args.verbose:
|
|
base_arg.append('-v')
|
|
if args.owner:
|
|
base_arg.extend(['--owner', args.owner])
|
|
|
|
for collection in args.collections:
|
|
if not os.path.isdir(collection):
|
|
print >>sys.stderr, 'Collection %s is not a directory' % collection
|
|
sys.exit(1)
|
|
|
|
collections = [
|
|
(collection,
|
|
os.path.basename(safepath.munge(collection)))
|
|
for collection in args.collections
|
|
]
|
|
|
|
for collection, collection_name in sorted(collections):
|
|
log.info('Collection %s in %s' % (collection_name, collection))
|
|
facets = [
|
|
f for f in sorted(os.listdir(collection))
|
|
if not f.startswith('.')
|
|
and os.path.isdir(os.path.join(collection, f))
|
|
]
|
|
facet_configs = (
|
|
[(f, name, os.path.join(collection, f, name))
|
|
for name in sorted(os.listdir(os.path.join(collection, f)))
|
|
if not name.startswith('.')
|
|
and name.endswith('.yaml')
|
|
]
|
|
for f in facets
|
|
)
|
|
|
|
arch = get_arch(args.config)
|
|
machine_type = get_machine_type(args.config)
|
|
for configs in itertools.product(*facet_configs):
|
|
description = 'collection:%s ' % (collection_name);
|
|
description += ' '.join('{facet}:{name}'.format(
|
|
facet=facet, name=name)
|
|
for facet, name, path in configs)
|
|
os_type = get_os_type(configs)
|
|
exclude_arch = get_exclude_arch(configs)
|
|
exclude_os_type = get_exclude_os_type(configs)
|
|
if exclude_arch:
|
|
if exclude_arch == arch:
|
|
log.info(
|
|
'Skipping due to excluded_arch: %s facets %s', exclude_arch, description
|
|
)
|
|
continue
|
|
if exclude_os_type:
|
|
if exclude_os_type == os_type:
|
|
log.info(
|
|
'Skipping due to excluded_os_type: %s facets %s', exclude_os_type, description
|
|
)
|
|
continue
|
|
# We should not run multiple tests (changing distros) unless the machine is a VPS
|
|
# Re-imaging baremetal is not yet supported.
|
|
if machine_type != 'vps':
|
|
if os_type and os_type != 'ubuntu':
|
|
log.info(
|
|
'Skipping due to non-ubuntu on baremetal facets %s', description
|
|
)
|
|
continue
|
|
|
|
log.info(
|
|
'Running teuthology-schedule with facets %s', description
|
|
)
|
|
|
|
arg = copy.deepcopy(base_arg)
|
|
arg.extend([
|
|
'--description', description,
|
|
'--',
|
|
])
|
|
arg.extend(args.config)
|
|
arg.extend(path for facet, name, path in configs)
|
|
subprocess.check_call(
|
|
args=arg,
|
|
)
|
|
arg = copy.deepcopy(base_arg)
|
|
arg.append('--last-in-suite')
|
|
if args.email:
|
|
arg.extend(['--email', args.email])
|
|
if args.timeout:
|
|
arg.extend(['--timeout', args.timeout])
|
|
subprocess.check_call(
|
|
args=arg,
|
|
)
|
|
|
|
def ls():
|
|
parser = argparse.ArgumentParser(description='List teuthology job results')
|
|
parser.add_argument(
|
|
'--archive-dir',
|
|
metavar='DIR',
|
|
help='path under which to archive results',
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true', default=False,
|
|
help='show reasons tests failed',
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
for j in get_jobs(args.archive_dir):
|
|
job_dir = os.path.join(args.archive_dir, j)
|
|
summary = {}
|
|
try:
|
|
with file(os.path.join(job_dir, 'summary.yaml')) as f:
|
|
g = yaml.safe_load_all(f)
|
|
for new in g:
|
|
summary.update(new)
|
|
except IOError, e:
|
|
if e.errno == errno.ENOENT:
|
|
print '%s ' % j,
|
|
|
|
# pid
|
|
try:
|
|
pidfile = os.path.join(job_dir, 'pid')
|
|
found = False
|
|
if os.path.isfile(pidfile):
|
|
pid = open(pidfile, 'r').read()
|
|
if os.path.isdir("/proc/%s" % pid):
|
|
cmdline = open('/proc/%s/cmdline' % pid, 'r').read()
|
|
if cmdline.find(args.archive_dir) >= 0:
|
|
print '(pid %s)' % pid,
|
|
found = True
|
|
if not found:
|
|
print '(no process or summary.yaml)',
|
|
# tail
|
|
tail = os.popen(
|
|
'tail -1 %s/%s/teuthology.log' % (args.archive_dir, j)
|
|
).read().rstrip()
|
|
print tail,
|
|
except IOError, e:
|
|
continue
|
|
print ''
|
|
continue
|
|
else:
|
|
raise
|
|
|
|
print "{job} {success} {owner} {desc} {duration}s".format(
|
|
job=j,
|
|
owner=summary.get('owner', '-'),
|
|
desc=summary.get('description', '-'),
|
|
success='pass' if summary.get('success', False) else 'FAIL',
|
|
duration=int(summary.get('duration', 0)),
|
|
)
|
|
if args.verbose and 'failure_reason' in summary:
|
|
print ' {reason}'.format(reason=summary['failure_reason'])
|
|
|
|
def generate_coverage(args):
|
|
log.info('starting coverage generation')
|
|
subprocess.Popen(
|
|
args=[
|
|
os.path.join(os.path.dirname(sys.argv[0]), 'teuthology-coverage'),
|
|
'-v',
|
|
'-o',
|
|
os.path.join(args.teuthology_config['coverage_output_dir'], args.name),
|
|
'--html-output',
|
|
os.path.join(args.teuthology_config['coverage_html_dir'], args.name),
|
|
'--cov-tools-dir',
|
|
args.teuthology_config['coverage_tools_dir'],
|
|
args.archive_dir,
|
|
],
|
|
)
|
|
|
|
def email_results(subject, from_, to, body):
|
|
log.info('Sending results to {to}: {body}'.format(to=to, body=body))
|
|
import smtplib
|
|
from email.mime.text import MIMEText
|
|
msg = MIMEText(body)
|
|
msg['Subject'] = subject
|
|
msg['From'] = from_
|
|
msg['To'] = to
|
|
log.debug('sending email %s', msg.as_string())
|
|
smtp = smtplib.SMTP('localhost')
|
|
smtp.sendmail(msg['From'], [msg['To']], msg.as_string())
|
|
smtp.quit()
|
|
|
|
def results():
|
|
parser = argparse.ArgumentParser(description='Email teuthology suite results')
|
|
parser.add_argument(
|
|
'--email',
|
|
help='address to email test failures to',
|
|
)
|
|
parser.add_argument(
|
|
'--timeout',
|
|
help='how many seconds to wait for all tests to finish (default no wait)',
|
|
type=int,
|
|
default=0,
|
|
)
|
|
parser.add_argument(
|
|
'--archive-dir',
|
|
metavar='DIR',
|
|
help='path under which results for the suite are stored',
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
'--name',
|
|
help='name of the suite',
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true', default=False,
|
|
help='be more verbose',
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
loglevel = logging.INFO
|
|
if args.verbose:
|
|
loglevel = logging.DEBUG
|
|
|
|
logging.basicConfig(
|
|
level=loglevel,
|
|
)
|
|
|
|
teuthology.read_config(args)
|
|
|
|
handler = logging.FileHandler(
|
|
filename=os.path.join(args.archive_dir, 'results.log'),
|
|
)
|
|
formatter = logging.Formatter(
|
|
fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(message)s',
|
|
datefmt='%Y-%m-%dT%H:%M:%S',
|
|
)
|
|
handler.setFormatter(formatter)
|
|
logging.getLogger().addHandler(handler)
|
|
|
|
try:
|
|
_results(args)
|
|
except:
|
|
log.exception('error generating results')
|
|
raise
|
|
|
|
|
|
def _results(args):
|
|
running_tests = [
|
|
f for f in sorted(os.listdir(args.archive_dir))
|
|
if not f.startswith('.')
|
|
and os.path.isdir(os.path.join(args.archive_dir, f))
|
|
and not os.path.exists(os.path.join(args.archive_dir, f, 'summary.yaml'))
|
|
]
|
|
starttime = time.time()
|
|
log.info('Waiting up to %d seconds for tests to finish...', args.timeout)
|
|
while running_tests and args.timeout > 0:
|
|
if os.path.exists(os.path.join(
|
|
args.archive_dir,
|
|
running_tests[-1], 'summary.yaml')):
|
|
running_tests.pop()
|
|
else:
|
|
if time.time() - starttime > args.timeout:
|
|
log.warn('test(s) did not finish before timeout of %d seconds',
|
|
args.timeout)
|
|
break
|
|
time.sleep(10)
|
|
log.info('Tests finished! gathering results...')
|
|
|
|
(subject, body) = build_email_body(args.name, args.archive_dir,
|
|
args.timeout)
|
|
|
|
try:
|
|
if args.email:
|
|
email_results(
|
|
subject=subject,
|
|
from_=args.teuthology_config['results_sending_email'],
|
|
to=args.email,
|
|
body=body,
|
|
)
|
|
finally:
|
|
generate_coverage(args)
|
|
|
|
|
|
def get_http_log_path(archive_dir, job_id):
|
|
http_base = config.archive_server
|
|
if not http_base:
|
|
return None
|
|
archive_subdir = os.path.split(archive_dir)[-1]
|
|
return os.path.join(http_base, archive_subdir, str(job_id))
|
|
|
|
|
|
def get_jobs(archive_dir):
|
|
dir_contents = os.listdir(archive_dir)
|
|
|
|
def is_job_dir(parent, subdir):
|
|
if os.path.isdir(os.path.join(parent, subdir)) and re.match('\d+$', subdir):
|
|
return True
|
|
return False
|
|
|
|
jobs = [job for job in dir_contents if is_job_dir(archive_dir, job)]
|
|
return sorted(jobs)
|
|
|
|
|
|
def build_email_body(name, archive_dir, timeout):
|
|
failed = []
|
|
unfinished = []
|
|
passed = []
|
|
|
|
for job in get_jobs(archive_dir):
|
|
job_dir = os.path.join(archive_dir, job)
|
|
summary_file = os.path.join(job_dir, 'summary.yaml')
|
|
|
|
# Unfinished jobs will have no summary.yaml
|
|
if not os.path.exists(summary_file):
|
|
unfinished.append(job)
|
|
continue
|
|
|
|
summary = {}
|
|
with file(summary_file) as f:
|
|
summary = yaml.safe_load(f)
|
|
long_desc = '{test}: ({duration}s) {desc}'.format(
|
|
duration=int(summary.get('duration', 0)),
|
|
desc=summary['description'],
|
|
test=job,
|
|
)
|
|
if summary['success']:
|
|
passed.append(long_desc)
|
|
else:
|
|
full_desc = long_desc
|
|
if 'failure_reason' in summary:
|
|
full_desc += '\n %s' % summary['failure_reason']
|
|
http_log = get_http_log_path(archive_dir, job)
|
|
if http_log:
|
|
full_desc += '\n %s' % http_log
|
|
sentry_events = summary.get('sentry_events')
|
|
if sentry_events:
|
|
full_desc += '\n %s' % '\n '.join(sentry_events)
|
|
failed.append(full_desc)
|
|
|
|
maybe_comma = lambda s: ', ' if s else ' '
|
|
|
|
subject = ''
|
|
body = ''
|
|
if failed:
|
|
subject += '{num_failed} failed{sep}'.format(
|
|
num_failed=len(failed),
|
|
sep=maybe_comma(unfinished or passed)
|
|
)
|
|
body += 'The following tests failed:\n%s\n\n\n' % '\n'.join(failed)
|
|
if unfinished:
|
|
subject += '{num_hung} hung{sep}'.format(
|
|
num_hung=len(unfinished),
|
|
sep=maybe_comma(passed)
|
|
)
|
|
body += 'These tests may be hung (did not finish in {timeout} seconds after the last test in the suite):\n{hung_jobs}\n\n\n'.format(
|
|
timeout=timeout,
|
|
hung_jobs='\n'.join(unfinished),
|
|
)
|
|
if passed:
|
|
subject += '%s passed ' % len(passed)
|
|
body += 'These tests passed:\n%s' % '\n'.join(passed)
|
|
subject += 'in {suite}'.format(suite=name)
|
|
return (subject.strip(), body.strip())
|
|
|
|
|
|
def get_arch(config):
|
|
for yamlfile in config:
|
|
y = yaml.safe_load(file(yamlfile))
|
|
machine_type = y.get('machine_type')
|
|
if machine_type:
|
|
fakectx = []
|
|
locks = lock.list_locks(fakectx)
|
|
for machine in locks:
|
|
if machine['type'] == machine_type:
|
|
arch = machine['arch']
|
|
return arch
|
|
return None
|
|
|
|
|
|
def get_os_type(configs):
|
|
for config in configs:
|
|
yamlfile = config[2]
|
|
y = yaml.safe_load(file(yamlfile))
|
|
os_type = y.get('os_type')
|
|
if os_type:
|
|
return os_type
|
|
return None
|
|
|
|
|
|
def get_exclude_arch(configs):
|
|
for config in configs:
|
|
yamlfile = config[2]
|
|
y = yaml.safe_load(file(yamlfile))
|
|
os_type = y.get('exclude_arch')
|
|
if os_type:
|
|
return os_type
|
|
return None
|
|
|
|
|
|
def get_exclude_os_type(configs):
|
|
for config in configs:
|
|
yamlfile = config[2]
|
|
y = yaml.safe_load(file(yamlfile))
|
|
os_type = y.get('exclude_os_type')
|
|
if os_type:
|
|
return os_type
|
|
return None
|
|
|
|
|
|
def get_machine_type(config):
|
|
for yamlfile in config:
|
|
y = yaml.safe_load(file(yamlfile))
|
|
machine_type = y.get('machine_type')
|
|
if machine_type:
|
|
return machine_type
|
|
return None
|
|
|