mirror of
https://github.com/ceph/ceph
synced 2024-12-28 06:23:08 +00:00
f4321b0519
Signed-off-by: Zack Cerza <zack.cerza@inktank.com>
599 lines
19 KiB
Python
599 lines
19 KiB
Python
# this file is responsible for submitting tests into the queue
|
|
# by generating combinations of facets found in
|
|
# https://github.com/ceph/ceph-qa-suite.git
|
|
|
|
import copy
|
|
from datetime import datetime
|
|
import itertools
|
|
import logging
|
|
import os
|
|
import requests
|
|
import pwd
|
|
import subprocess
|
|
import smtplib
|
|
import sys
|
|
import yaml
|
|
from email.mime.text import MIMEText
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
import teuthology
|
|
from teuthology import lock as lock
|
|
from teuthology.config import config
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def main(args):
|
|
verbose = args['--verbose']
|
|
if verbose:
|
|
teuthology.log.setLevel(logging.DEBUG)
|
|
dry_run = args['--dry-run']
|
|
|
|
base_yaml_paths = args['<config_yaml>']
|
|
base = os.path.expanduser(args['--base'])
|
|
if not os.path.exists(base):
|
|
schedule_fail("Base directory not found: {dir}".format(dir=base))
|
|
suite = args['--suite']
|
|
nice_suite = suite.replace('/', ':')
|
|
ceph_branch = args['--ceph']
|
|
kernel_branch = args['--kernel']
|
|
kernel_flavor = args['--flavor']
|
|
teuthology_branch = args['--teuthology-branch']
|
|
machine_type = args['--machine-type']
|
|
distro = args['--distro']
|
|
|
|
limit = int(args['--limit'])
|
|
priority = int(args['--priority'])
|
|
num = int(args['--num'])
|
|
owner = args['--owner']
|
|
email = args['--email']
|
|
if email:
|
|
config.email_specified = True
|
|
config.results_email = email
|
|
timeout = args['--timeout']
|
|
|
|
name = make_run_name(nice_suite, ceph_branch, kernel_branch, kernel_flavor,
|
|
machine_type)
|
|
config_string = create_initial_config(nice_suite, ceph_branch,
|
|
teuthology_branch, kernel_branch,
|
|
kernel_flavor, distro, machine_type)
|
|
|
|
with NamedTemporaryFile(prefix='schedule_suite_',
|
|
delete=False) as base_yaml:
|
|
base_yaml.write(config_string)
|
|
base_yaml_path = base_yaml.name
|
|
base_yaml_paths.insert(0, base_yaml_path)
|
|
prepare_and_schedule(owner=owner,
|
|
name=name,
|
|
suite=suite,
|
|
machine_type=machine_type,
|
|
base=base,
|
|
base_yaml_paths=base_yaml_paths,
|
|
email=email,
|
|
priority=priority,
|
|
limit=limit,
|
|
num=num,
|
|
timeout=timeout,
|
|
dry_run=dry_run,
|
|
verbose=verbose,
|
|
)
|
|
os.remove(base_yaml_path)
|
|
|
|
|
|
def make_run_name(suite, ceph_branch, kernel_branch, kernel_flavor,
|
|
machine_type, user=None, timestamp=None):
|
|
"""
|
|
Generate a run name based on the parameters. A run name looks like:
|
|
teuthology-2014-06-23_19:00:37-rados-dumpling-testing-basic-plana
|
|
"""
|
|
if not user:
|
|
user = pwd.getpwuid(os.getuid()).pw_name
|
|
# We assume timestamp is a datetime.datetime object
|
|
if not timestamp:
|
|
timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
|
|
|
|
worker = get_worker(machine_type)
|
|
return '-'.join(
|
|
[user, str(timestamp), suite, ceph_branch,
|
|
kernel_branch, kernel_flavor, worker]
|
|
)
|
|
|
|
|
|
def create_initial_config(nice_suite, ceph_branch, teuthology_branch,
|
|
kernel_branch, kernel_flavor, distro, machine_type):
|
|
"""
|
|
Put together the config file used as the basis for each job in the run.
|
|
Grabs hashes for the latest ceph, kernel and teuthology versions in the
|
|
branches specified and specifies them so we know exactly what we're
|
|
testing.
|
|
|
|
:returns: A yaml-formatted string
|
|
"""
|
|
# Put together a stanza specifying the kernel hash
|
|
if kernel_branch == 'distro':
|
|
kernel_hash = 'distro'
|
|
# Skip the stanza if the branch passed is '-'
|
|
elif kernel_branch == '-':
|
|
kernel_hash = None
|
|
else:
|
|
kernel_hash = get_hash('kernel', kernel_branch, kernel_flavor,
|
|
machine_type)
|
|
if not kernel_hash:
|
|
schedule_fail(message="Kernel branch '{branch} not found".format(
|
|
branch=kernel_branch))
|
|
if kernel_hash:
|
|
log.info("kernel sha1: {hash}".format(hash=kernel_hash))
|
|
kernel_dict = dict(kernel=dict(kdb=True, sha1=kernel_hash))
|
|
kernel_stanza = yaml.dump(kernel_dict, default_flow_style=False).strip()
|
|
else:
|
|
kernel_stanza = ''
|
|
|
|
# Get the ceph hash
|
|
ceph_hash = get_hash('ceph', ceph_branch, kernel_flavor, machine_type)
|
|
if not ceph_hash:
|
|
schedule_fail("Ceph branch '{branch}' not found".format(
|
|
branch=ceph_branch))
|
|
log.info("ceph sha1: {hash}".format(hash=ceph_hash))
|
|
|
|
# Get the ceph package version
|
|
ceph_version = package_version_for_hash(ceph_hash, kernel_flavor,
|
|
machine_type)
|
|
if not ceph_version:
|
|
schedule_fail("Packages for ceph version '{ver}' not found".format(
|
|
ver=ceph_version))
|
|
log.info("ceph version: {ver}".format(ver=ceph_version))
|
|
|
|
# Decide what branch of s3-tests to use
|
|
if get_branch_info('s3-tests', ceph_branch):
|
|
s3_branch = ceph_branch
|
|
else:
|
|
log.info("branch {0} not in s3-tests.git; will use master for"
|
|
"s3-tests".format(ceph_branch))
|
|
s3_branch = 'master'
|
|
log.info("s3-tests branch: %s", s3_branch)
|
|
|
|
if not teuthology_branch:
|
|
# Decide what branch of teuthology to use
|
|
if get_branch_info('teuthology', ceph_branch):
|
|
teuthology_branch = ceph_branch
|
|
else:
|
|
log.info("branch {0} not in teuthology.git; will use master for"
|
|
"teuthology".format(ceph_branch))
|
|
teuthology_branch = 'master'
|
|
log.info("teuthology branch: %s", teuthology_branch)
|
|
|
|
config_input = dict(
|
|
nice_suite=nice_suite,
|
|
ceph_branch=ceph_branch,
|
|
ceph_hash=ceph_hash,
|
|
teuthology_branch=teuthology_branch,
|
|
machine_type=machine_type,
|
|
kernel_stanza=kernel_stanza,
|
|
distro=distro,
|
|
s3_branch=s3_branch,
|
|
)
|
|
return config_template.format(**config_input)
|
|
|
|
|
|
def prepare_and_schedule(owner, name, suite, machine_type, base,
|
|
base_yaml_paths, email, priority, limit, num, timeout,
|
|
dry_run, verbose):
|
|
"""
|
|
Puts together some "base arguments" with which to execute
|
|
teuthology-schedule for each job, then passes them and other parameters to
|
|
schedule_suite(). Finally, schedules a "last-in-suite" job that sends an
|
|
email to the specified address (if one is specified).
|
|
"""
|
|
arch = get_arch(machine_type)
|
|
|
|
base_args = [
|
|
os.path.join(os.path.dirname(sys.argv[0]), 'teuthology-schedule'),
|
|
'--name', name,
|
|
'--num', str(num),
|
|
'--worker', get_worker(machine_type),
|
|
]
|
|
if priority:
|
|
base_args.extend(['--priority', str(priority)])
|
|
if verbose:
|
|
base_args.append('-v')
|
|
if owner:
|
|
base_args.extend(['--owner', owner])
|
|
|
|
suite_path = os.path.join(base, suite)
|
|
|
|
num_jobs = schedule_suite(
|
|
name=suite,
|
|
path=suite_path,
|
|
base_yamls=base_yaml_paths,
|
|
base_args=base_args,
|
|
arch=arch,
|
|
machine_type=machine_type,
|
|
limit=limit,
|
|
dry_run=dry_run,
|
|
)
|
|
|
|
if email and num_jobs:
|
|
arg = copy.deepcopy(base_args)
|
|
arg.append('--last-in-suite')
|
|
if email:
|
|
arg.extend(['--email', email])
|
|
if timeout:
|
|
arg.extend(['--timeout', timeout])
|
|
if dry_run:
|
|
log.info('dry-run: %s' % ' '.join(arg))
|
|
else:
|
|
subprocess.check_call(
|
|
args=arg,
|
|
)
|
|
|
|
|
|
def schedule_fail(message, name=None):
|
|
"""
|
|
If an email address has been specified anywhere, send an alert there. Then
|
|
raise a ScheduleFailError.
|
|
"""
|
|
email = config.results_email
|
|
if email:
|
|
subject = "Failed to schedule {name}".format(name=name)
|
|
msg = MIMEText(message)
|
|
msg['Subject'] = subject
|
|
msg['From'] = config.results_sending_email
|
|
msg['To'] = email
|
|
smtp = smtplib.SMTP('localhost')
|
|
smtp.sendmail(msg['From'], [msg['To']], msg.as_string())
|
|
smtp.quit()
|
|
raise ScheduleFailError(message, name)
|
|
|
|
|
|
class ScheduleFailError(RuntimeError):
|
|
def __init__(self, message, name=None):
|
|
self.message = message
|
|
self.name = name
|
|
|
|
def __str__(self):
|
|
return "Job scheduling {name} failed: '{msg}'".format(
|
|
name=self.name,
|
|
msg=self.message,
|
|
).replace(' ', ' ')
|
|
|
|
|
|
def get_worker(machine_type):
|
|
"""
|
|
Map a given machine_type to a beanstalkd worker. If machine_type mentions
|
|
multiple machine types - e.g. 'plana,mira', then this returns 'multi'.
|
|
Otherwise it returns what was passed.
|
|
"""
|
|
if ',' in machine_type:
|
|
return 'multi'
|
|
else:
|
|
return machine_type
|
|
|
|
|
|
def get_hash(project='ceph', branch='master', flavor='basic',
|
|
distro='ubuntu', machine_type='plana'):
|
|
"""
|
|
Find the hash representing the head of the project's repository via
|
|
querying a gitbuilder repo.
|
|
|
|
Will return None in the case of a 404 or any other HTTP error.
|
|
"""
|
|
# Alternate method for github-hosted projects - left here for informational
|
|
# purposes
|
|
#resp = requests.get(
|
|
# 'https://api.github.com/repos/ceph/ceph/git/refs/heads/master')
|
|
#hash = .json()['object']['sha']
|
|
(arch, release, pkg_type) = get_distro_defaults(distro, machine_type)
|
|
base_url = get_gitbuilder_url(project, release, pkg_type, arch, flavor)
|
|
url = os.path.join(base_url, 'ref', branch, 'sha1')
|
|
resp = requests.get(url)
|
|
if not resp.ok:
|
|
return None
|
|
return str(resp.text.strip())
|
|
|
|
|
|
def get_distro_defaults(distro, machine_type):
|
|
"""
|
|
Given a distro (e.g. 'ubuntu') and machine type, return:
|
|
(arch, release, pkg_type)
|
|
|
|
This is mainly used to default to:
|
|
('x86_64', 'precise', 'deb') when passed 'ubuntu' and 'plana'
|
|
And ('armv7l', 'saucy', 'deb') when passed 'ubuntu' and 'saya'
|
|
And ('x86_64', 'centos6', 'rpm') when passed anything non-ubuntu
|
|
"""
|
|
if distro == 'ubuntu':
|
|
if machine_type == 'saya':
|
|
arch = 'armv7l'
|
|
release = 'saucy'
|
|
pkg_type = 'deb'
|
|
else:
|
|
arch = 'x86_64'
|
|
release = 'precise'
|
|
pkg_type = 'deb'
|
|
else:
|
|
arch = 'x86_64'
|
|
release = 'centos6'
|
|
pkg_type = 'rpm'
|
|
log.debug(
|
|
"Defaults for machine_type %s: arch=%s, release=%s, pkg_type=%s)",
|
|
machine_type, arch, release, pkg_type)
|
|
return (
|
|
arch,
|
|
release,
|
|
pkg_type,
|
|
)
|
|
|
|
|
|
def get_gitbuilder_url(project, distro, pkg_type, arch, kernel_flavor):
|
|
"""
|
|
Return a base URL like:
|
|
http://gitbuilder.ceph.com/ceph-deb-squeeze-x86_64-basic/
|
|
|
|
:param project: 'ceph' or 'kernel'
|
|
:param distro: A distro-ish string like 'trusty' or 'fedora20'
|
|
:param pkg_type: Probably 'rpm' or 'deb'
|
|
:param arch: A string like 'x86_64'
|
|
:param kernel_flavor: A string like 'basic'
|
|
"""
|
|
templ = 'http://gitbuilder.ceph.com/{proj}-{pkg}-{distro}-{arch}-{flav}/'
|
|
return templ.format(proj=project, pkg=pkg_type, distro=distro, arch=arch,
|
|
flav=kernel_flavor)
|
|
|
|
|
|
def package_version_for_hash(hash, kernel_flavor='basic',
|
|
distro='ubuntu', machine_type='plana'):
|
|
"""
|
|
Does what it says on the tin. Uses gitbuilder repos.
|
|
|
|
:returns: a string.
|
|
"""
|
|
(arch, release, pkg_type) = get_distro_defaults(distro, machine_type)
|
|
base_url = get_gitbuilder_url('ceph', release, pkg_type, arch,
|
|
kernel_flavor)
|
|
url = os.path.join(base_url, 'sha1', hash, 'version')
|
|
resp = requests.get(url)
|
|
if resp.ok:
|
|
return resp.text.strip()
|
|
|
|
|
|
def get_branch_info(project, branch, project_owner='ceph'):
|
|
"""
|
|
Use the GitHub API to query a project's branch. Returns:
|
|
{u'object': {u'sha': <a_sha_string>,
|
|
u'type': <string>,
|
|
u'url': <url_to_commit>},
|
|
u'ref': u'refs/heads/<branch>',
|
|
u'url': <url_to_branch>}
|
|
|
|
We mainly use this to check if a branch exists.
|
|
"""
|
|
url_templ = 'https://api.github.com/repos/{project_owner}/{project}/git/refs/heads/{branch}' # noqa
|
|
url = url_templ.format(project_owner=project_owner, project=project,
|
|
branch=branch)
|
|
resp = requests.get(url)
|
|
if resp.ok:
|
|
return resp.json()
|
|
|
|
|
|
def schedule_suite(name,
|
|
path,
|
|
base_yamls,
|
|
base_args,
|
|
arch,
|
|
machine_type,
|
|
limit=0,
|
|
dry_run=True,
|
|
):
|
|
"""
|
|
schedule one suite.
|
|
returns number of jobs scheduled
|
|
"""
|
|
count = 0
|
|
log.debug('Suite %s in %s' % (name, path))
|
|
configs = [(combine_path(name, item[0]), item[1]) for item in
|
|
build_matrix(path)]
|
|
job_count = len(configs)
|
|
log.info('Suite %s in %s generated %d jobs' % (
|
|
name, path, len(configs)))
|
|
|
|
for description, fragment_paths in configs:
|
|
if limit > 0 and count >= limit:
|
|
log.info(
|
|
'Stopped after {limit} jobs due to --limit={limit}'.format(
|
|
limit=limit))
|
|
break
|
|
raw_yaml = '\n'.join([file(a, 'r').read() for a in fragment_paths])
|
|
|
|
parsed_yaml = yaml.load(raw_yaml)
|
|
os_type = parsed_yaml.get('os_type')
|
|
exclude_arch = parsed_yaml.get('exclude_arch')
|
|
exclude_os_type = parsed_yaml.get('exclude_os_type')
|
|
|
|
if exclude_arch and exclude_arch == arch:
|
|
log.info('Skipping due to excluded_arch: %s facets %s',
|
|
exclude_arch, description)
|
|
continue
|
|
if exclude_os_type and exclude_os_type == os_type:
|
|
log.info('Skipping due to excluded_os_type: %s facets %s',
|
|
exclude_os_type, description)
|
|
continue
|
|
# We should not run multiple tests (changing distros) unless the
|
|
# machine is a VPS.
|
|
# Re-imaging baremetal is not yet supported.
|
|
if machine_type != 'vps' and os_type and os_type != 'ubuntu':
|
|
log.info(
|
|
'Skipping due to non-ubuntu on baremetal facets %s',
|
|
description)
|
|
continue
|
|
|
|
log.info(
|
|
'Scheduling %s', description
|
|
)
|
|
|
|
arg = copy.deepcopy(base_args)
|
|
arg.extend([
|
|
'--description', description,
|
|
'--',
|
|
])
|
|
arg.extend(base_yamls)
|
|
arg.extend(fragment_paths)
|
|
|
|
if dry_run:
|
|
# Quote any individual args so that individual commands can be
|
|
# copied and pasted in order to execute them individually.
|
|
printable_args = []
|
|
for item in arg:
|
|
if ' ' in item:
|
|
printable_args.append("'%s'" % item)
|
|
else:
|
|
printable_args.append(item)
|
|
log.info('dry-run: %s' % ' '.join(printable_args))
|
|
else:
|
|
subprocess.check_call(
|
|
args=arg,
|
|
)
|
|
count += 1
|
|
return job_count
|
|
|
|
|
|
def combine_path(left, right):
|
|
"""
|
|
os.path.join(a, b) doesn't like it when b is None
|
|
"""
|
|
if right:
|
|
return os.path.join(left, right)
|
|
return left
|
|
|
|
|
|
def build_matrix(path):
|
|
"""
|
|
Return a list of items describe by path
|
|
|
|
The input is just a path. The output is an array of (description,
|
|
[file list]) tuples.
|
|
|
|
For a normal file we generate a new item for the result list.
|
|
|
|
For a directory, we (recursively) generate a new item for each
|
|
file/dir.
|
|
|
|
For a directory with a magic '+' file, we generate a single item
|
|
that concatenates all files/subdirs.
|
|
|
|
For a directory with a magic '%' file, we generate a result set
|
|
for each tiem in the directory, and then do a product to generate
|
|
a result list with all combinations.
|
|
|
|
The final description (after recursion) for each item will look
|
|
like a relative path. If there was a % product, that path
|
|
component will appear as a file with braces listing the selection
|
|
of chosen subitems.
|
|
"""
|
|
if os.path.isfile(path):
|
|
if path.endswith('.yaml'):
|
|
return [(None, [path])]
|
|
if os.path.isdir(path):
|
|
files = sorted(os.listdir(path))
|
|
if '+' in files:
|
|
# concatenate items
|
|
files.remove('+')
|
|
raw = []
|
|
for fn in files:
|
|
raw.extend(build_matrix(os.path.join(path, fn)))
|
|
out = [(
|
|
'{' + ' '.join(files) + '}',
|
|
[a[1][0] for a in raw]
|
|
)]
|
|
return out
|
|
elif '%' in files:
|
|
# convolve items
|
|
files.remove('%')
|
|
sublists = []
|
|
for fn in files:
|
|
raw = build_matrix(os.path.join(path, fn))
|
|
sublists.append([(combine_path(fn, item[0]), item[1])
|
|
for item in raw])
|
|
out = []
|
|
if sublists:
|
|
for sublist in itertools.product(*sublists):
|
|
name = '{' + ' '.join([item[0] for item in sublist]) + '}'
|
|
val = []
|
|
for item in sublist:
|
|
val.extend(item[1])
|
|
out.append((name, val))
|
|
return out
|
|
else:
|
|
# list items
|
|
out = []
|
|
for fn in files:
|
|
raw = build_matrix(os.path.join(path, fn))
|
|
out.extend([(combine_path(fn, item[0]), item[1])
|
|
for item in raw])
|
|
return out
|
|
return []
|
|
|
|
|
|
def get_arch(machine_type):
|
|
"""
|
|
Based on a given machine_type, return its architecture by querying the lock
|
|
server. Sound expensive? It is!
|
|
|
|
:returns: A string or None
|
|
"""
|
|
locks = lock.list_locks()
|
|
for machine in locks:
|
|
if machine['type'] == machine_type:
|
|
arch = machine['arch']
|
|
return arch
|
|
return None
|
|
|
|
# yaml template for the config that becomes the base for each generated job
|
|
# config
|
|
config_template = """
|
|
teuthology_branch: {teuthology_branch}
|
|
{kernel_stanza}
|
|
nuke-on-error: true
|
|
machine_type: {machine_type}
|
|
os_type: {distro}
|
|
branch: {ceph_branch}
|
|
suite: {nice_suite}
|
|
tasks:
|
|
- chef:
|
|
- clock.check:
|
|
overrides:
|
|
workunit:
|
|
sha1: {ceph_hash}
|
|
s3tests:
|
|
branch: {s3_branch}
|
|
install:
|
|
ceph:
|
|
sha1: {ceph_hash}
|
|
ceph:
|
|
sha1: {ceph_hash}
|
|
conf:
|
|
mon:
|
|
debug ms: 1
|
|
debug mon: 20
|
|
debug paxos: 20
|
|
osd:
|
|
debug ms: 1
|
|
debug osd: 20
|
|
debug filestore: 20
|
|
debug journal: 20
|
|
log-whitelist:
|
|
- slow request
|
|
ceph-deploy:
|
|
branch:
|
|
dev: {ceph_branch}
|
|
conf:
|
|
mon:
|
|
osd default pool size: 2
|
|
debug mon: 1
|
|
debug paxos: 20
|
|
debug ms: 20
|
|
client:
|
|
log file: /var/log/ceph/ceph-$name.$pid.log
|
|
admin_socket:
|
|
branch: {ceph_branch}
|
|
""".strip()
|