mirror of
https://github.com/ceph/ceph
synced 2025-01-29 14:34:40 +00:00
cd3a61093d
One creates the ctx.cluster object; the other initiates connections Signed-off-by: Zack Cerza <zack@redhat.com>
349 lines
10 KiB
Python
349 lines
10 KiB
Python
import os
|
|
import yaml
|
|
import StringIO
|
|
import contextlib
|
|
import sys
|
|
import logging
|
|
from traceback import format_tb
|
|
|
|
import teuthology
|
|
from . import report
|
|
from .job_status import get_status
|
|
from .misc import get_user, merge_configs
|
|
from .nuke import nuke
|
|
from .run_tasks import run_tasks
|
|
from .repo_utils import fetch_qa_suite
|
|
from .results import email_results
|
|
from .config import FakeNamespace
|
|
from .config import config as teuth_config
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def set_up_logging(verbose, archive):
|
|
if verbose:
|
|
teuthology.log.setLevel(logging.DEBUG)
|
|
|
|
if archive is not None:
|
|
os.mkdir(archive)
|
|
|
|
teuthology.setup_log_file(os.path.join(archive, 'teuthology.log'))
|
|
|
|
install_except_hook()
|
|
|
|
|
|
def install_except_hook():
|
|
def log_exception(exception_class, exception, traceback):
|
|
logging.critical(''.join(format_tb(traceback)))
|
|
if not exception.message:
|
|
logging.critical(exception_class.__name__)
|
|
return
|
|
logging.critical('{0}: {1}'.format(
|
|
exception_class.__name__, exception))
|
|
|
|
sys.excepthook = log_exception
|
|
|
|
|
|
def write_initial_metadata(archive, config, name, description, owner):
|
|
if archive is not None:
|
|
with file(os.path.join(archive, 'pid'), 'w') as f:
|
|
f.write('%d' % os.getpid())
|
|
|
|
with file(os.path.join(archive, 'owner'), 'w') as f:
|
|
f.write(owner + '\n')
|
|
|
|
with file(os.path.join(archive, 'orig.config.yaml'), 'w') as f:
|
|
yaml.safe_dump(config, f, default_flow_style=False)
|
|
|
|
info = {
|
|
'name': name,
|
|
'description': description,
|
|
'owner': owner,
|
|
'pid': os.getpid(),
|
|
}
|
|
if 'job_id' in config:
|
|
info['job_id'] = config['job_id']
|
|
|
|
with file(os.path.join(archive, 'info.yaml'), 'w') as f:
|
|
yaml.safe_dump(info, f, default_flow_style=False)
|
|
|
|
|
|
def fetch_tasks_if_needed(job_config):
|
|
"""
|
|
Fetch the suite repo (and include it in sys.path) so that we can use its
|
|
tasks.
|
|
|
|
Returns the suite_path. The existing suite_path will be returned if the
|
|
tasks can be imported, if not a new suite_path will try to be determined.
|
|
"""
|
|
# Any scheduled job will already have the suite checked out and its
|
|
# $PYTHONPATH set. We can check for this by looking for 'suite_path'
|
|
# in its config.
|
|
suite_path = job_config.get('suite_path')
|
|
if suite_path:
|
|
log.info("suite_path is set to %s; will attempt to use it", suite_path)
|
|
if suite_path not in sys.path:
|
|
sys.path.insert(1, suite_path)
|
|
|
|
try:
|
|
import tasks
|
|
log.info("Found tasks at %s", os.path.dirname(tasks.__file__))
|
|
# tasks found with the existing suite branch, return it
|
|
return suite_path
|
|
except ImportError:
|
|
log.info("Tasks not found; will attempt to fetch")
|
|
|
|
ceph_branch = job_config.get('branch', 'master')
|
|
suite_branch = job_config.get('suite_branch', ceph_branch)
|
|
suite_path = fetch_qa_suite(suite_branch)
|
|
sys.path.insert(1, suite_path)
|
|
return suite_path
|
|
|
|
|
|
def setup_config(config_paths):
|
|
"""
|
|
Takes a list of config yaml files and combines them
|
|
into a single dictionary. Processes / validates the dictionary and then
|
|
returns it.
|
|
"""
|
|
config = merge_configs(config_paths)
|
|
|
|
# Older versions of teuthology stored job_id as an int. Convert it to a str
|
|
# if necessary.
|
|
job_id = config.get('job_id')
|
|
if job_id is not None:
|
|
job_id = str(job_id)
|
|
config['job_id'] = job_id
|
|
|
|
# targets must be >= than roles
|
|
if 'targets' in config and 'roles' in config:
|
|
targets = len(config['targets'])
|
|
roles = len(config['roles'])
|
|
assert targets >= roles, \
|
|
'%d targets are needed for all roles but found %d listed.' % (
|
|
roles, targets)
|
|
|
|
return config
|
|
|
|
|
|
def get_machine_type(machine_type, config):
|
|
"""
|
|
If no machine_type is given, find the appropriate machine_type
|
|
from the given config.
|
|
"""
|
|
if machine_type is None:
|
|
fallback_default = config.get('machine_type',
|
|
teuth_config.default_machine_type)
|
|
machine_type = config.get('machine-type', fallback_default)
|
|
|
|
return machine_type
|
|
|
|
|
|
def get_summary(owner, description):
|
|
summary = dict(success=True)
|
|
summary['owner'] = owner
|
|
|
|
if description is not None:
|
|
summary['description'] = description
|
|
|
|
return summary
|
|
|
|
|
|
def validate_tasks(config):
|
|
"""
|
|
Ensures that config tasks do not include 'kernel'.
|
|
|
|
Returns the original tasks key if found. If not, returns an
|
|
empty list.
|
|
"""
|
|
if 'tasks' not in config:
|
|
log.warning('No tasks specified. Continuing anyway...')
|
|
# return the default value for tasks
|
|
return []
|
|
|
|
for task in config['tasks']:
|
|
msg = ('kernel installation shouldn be a base-level item, not part ' +
|
|
'of the tasks list')
|
|
assert 'kernel' not in task, msg
|
|
|
|
return config["tasks"]
|
|
|
|
|
|
def get_initial_tasks(lock, config, machine_type):
|
|
init_tasks = [{'internal.check_packages': None}]
|
|
if lock:
|
|
msg = ('You cannot specify targets in a config file when using the ' +
|
|
'--lock option')
|
|
assert 'targets' not in config, msg
|
|
init_tasks.append({'internal.lock_machines': (
|
|
len(config['roles']), machine_type)})
|
|
|
|
init_tasks.extend([
|
|
{'internal.save_config': None},
|
|
{'internal.check_lock': None},
|
|
{'internal.add_remotes': None},
|
|
{'internal.connect': None},
|
|
{'internal.push_inventory': None},
|
|
{'internal.serialize_remote_roles': None},
|
|
{'internal.check_conflict': None},
|
|
])
|
|
|
|
if not config.get('use_existing_cluster', False):
|
|
init_tasks.extend([
|
|
{'internal.check_ceph_data': None},
|
|
{'internal.vm_setup': None},
|
|
])
|
|
|
|
if 'kernel' in config:
|
|
init_tasks.append({'kernel': config['kernel']})
|
|
|
|
init_tasks.extend([
|
|
{'internal.base': None},
|
|
{'internal.archive': None},
|
|
{'internal.coredump': None},
|
|
{'internal.sudo': None},
|
|
{'internal.syslog': None},
|
|
{'internal.timer': None},
|
|
])
|
|
|
|
return init_tasks
|
|
|
|
|
|
def report_outcome(config, archive, summary, fake_ctx):
|
|
""" Reports on the final outcome of the command. """
|
|
status = get_status(summary)
|
|
passed = status == 'pass'
|
|
|
|
if not passed and bool(config.get('nuke-on-error')):
|
|
# only unlock if we locked them in the first place
|
|
nuke(fake_ctx, fake_ctx.lock)
|
|
|
|
if archive is not None:
|
|
with file(os.path.join(archive, 'summary.yaml'), 'w') as f:
|
|
yaml.safe_dump(summary, f, default_flow_style=False)
|
|
|
|
with contextlib.closing(StringIO.StringIO()) as f:
|
|
yaml.safe_dump(summary, f)
|
|
log.info('Summary data:\n%s' % f.getvalue())
|
|
|
|
with contextlib.closing(StringIO.StringIO()) as f:
|
|
if ('email-on-error' in config
|
|
and not passed):
|
|
yaml.safe_dump(summary, f)
|
|
yaml.safe_dump(config, f)
|
|
emsg = f.getvalue()
|
|
subject = "Teuthology error -- %s" % summary[
|
|
'failure_reason']
|
|
email_results(subject, "Teuthology", config[
|
|
'email-on-error'], emsg)
|
|
|
|
report.try_push_job_info(config, summary)
|
|
|
|
if passed:
|
|
log.info(status)
|
|
else:
|
|
log.info(str(status).upper())
|
|
sys.exit(1)
|
|
|
|
|
|
def get_teuthology_command(args):
|
|
"""
|
|
Rebuilds the teuthology command used to run this job
|
|
and returns it as a string.
|
|
"""
|
|
cmd = ["teuthology"]
|
|
for key, value in args.iteritems():
|
|
if value:
|
|
# an option, not an argument
|
|
if not key.startswith("<"):
|
|
cmd.append(key)
|
|
else:
|
|
# this is the <config> argument
|
|
for arg in value:
|
|
cmd.append(str(arg))
|
|
continue
|
|
# so we don't print something like --verbose True
|
|
if isinstance(value, str):
|
|
cmd.append(value)
|
|
return " ".join(cmd)
|
|
|
|
|
|
def main(args):
|
|
verbose = args["--verbose"]
|
|
archive = args["--archive"]
|
|
owner = args["--owner"]
|
|
config = args["<config>"]
|
|
name = args["--name"]
|
|
description = args["--description"]
|
|
machine_type = args["--machine-type"]
|
|
block = args["--block"]
|
|
lock = args["--lock"]
|
|
suite_path = args["--suite-path"]
|
|
os_type = args["--os-type"]
|
|
os_version = args["--os-version"]
|
|
|
|
set_up_logging(verbose, archive)
|
|
|
|
# print the command being ran
|
|
log.debug("Teuthology command: {0}".format(get_teuthology_command(args)))
|
|
|
|
if owner is None:
|
|
args["--owner"] = owner = get_user()
|
|
|
|
config = setup_config(config)
|
|
|
|
write_initial_metadata(archive, config, name, description, owner)
|
|
report.try_push_job_info(config, dict(status='running'))
|
|
|
|
machine_type = get_machine_type(machine_type, config)
|
|
args["--machine-type"] = machine_type
|
|
|
|
if block:
|
|
assert lock, \
|
|
'the --block option is only supported with the --lock option'
|
|
|
|
log.debug(
|
|
'\n '.join(['Config:', ] + yaml.safe_dump(
|
|
config, default_flow_style=False).splitlines()))
|
|
|
|
args["summary"] = get_summary(owner, description)
|
|
|
|
config["tasks"] = validate_tasks(config)
|
|
|
|
init_tasks = get_initial_tasks(lock, config, machine_type)
|
|
|
|
# prepend init_tasks to the front of the task list
|
|
config['tasks'][:0] = init_tasks
|
|
|
|
if suite_path is not None:
|
|
config['suite_path'] = suite_path
|
|
|
|
# fetches the tasks and returns a new suite_path if needed
|
|
config["suite_path"] = fetch_tasks_if_needed(config)
|
|
|
|
# overwrite the config value of os_type if --os-type is provided
|
|
if os_type:
|
|
config["os_type"] = os_type
|
|
|
|
# overwrite the config value of os_version if --os-version is provided
|
|
if os_version:
|
|
config["os_version"] = os_version
|
|
|
|
# create a FakeNamespace instance that mimics the old argparse way of doing
|
|
# things we do this so we can pass it to run_tasks without porting those
|
|
# tasks to the new way of doing things right now
|
|
args["<config>"] = config
|
|
fake_ctx = FakeNamespace(args)
|
|
|
|
# store on global config if interactive-on-error, for contextutil.nested()
|
|
# FIXME this should become more generic, and the keys should use
|
|
# '_' uniformly
|
|
if fake_ctx.config.get('interactive-on-error'):
|
|
teuthology.config.config.ctx = fake_ctx
|
|
|
|
try:
|
|
run_tasks(tasks=config['tasks'], ctx=fake_ctx)
|
|
finally:
|
|
# print to stdout the results and possibly send an email on any errors
|
|
report_outcome(config, archive, fake_ctx.summary, fake_ctx)
|