mirror of
https://github.com/ceph/ceph
synced 2024-12-18 17:37:38 +00:00
Merge pull request #54 from ceph/sentry
Add logs and Sentry events to suite emails
This commit is contained in:
commit
a472f5017a
35
teuthology/config.py
Normal file
35
teuthology/config.py
Normal file
@ -0,0 +1,35 @@
|
||||
import os
|
||||
import yaml
|
||||
import logging
|
||||
|
||||
CONF_FILE = os.path.join(os.environ['HOME'], '.teuthology.yaml')
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class _Config(object):
|
||||
"""
|
||||
This class is intended to unify teuthology's many configuration files and
|
||||
objects. Currently it serves as a convenient interface to
|
||||
~/.teuthology.yaml and nothing else.
|
||||
"""
|
||||
def __init__(self):
|
||||
if os.path.exists(CONF_FILE):
|
||||
self.__conf = yaml.safe_load(file(CONF_FILE))
|
||||
else:
|
||||
log.debug("%s not found", CONF_FILE)
|
||||
self.__conf = {}
|
||||
|
||||
# This property declaration exists mainly as an example; it is not
|
||||
# necessary unless you want to, say, define a set method and/or a
|
||||
# docstring.
|
||||
@property
|
||||
def lock_server(self):
|
||||
return self.__conf.get('lock_server')
|
||||
|
||||
# This takes care of any and all of the rest.
|
||||
# If the parameter is defined, return it. Otherwise return None.
|
||||
def __getattr__(self, name):
|
||||
return self.__conf.get(name)
|
||||
|
||||
config = _Config()
|
@ -1,9 +1,11 @@
|
||||
import sys
|
||||
import logging
|
||||
from teuthology.sentry import get_client as get_sentry_client
|
||||
from .config import config as teuth_config
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_one_task(taskname, **kwargs):
|
||||
submod = taskname
|
||||
subtask = 'task'
|
||||
@ -14,6 +16,7 @@ def run_one_task(taskname, **kwargs):
|
||||
fn = getattr(mod, subtask)
|
||||
return fn(**kwargs)
|
||||
|
||||
|
||||
def run_tasks(tasks, ctx):
|
||||
stack = []
|
||||
try:
|
||||
@ -31,12 +34,20 @@ def run_tasks(tasks, ctx):
|
||||
ctx.summary['success'] = False
|
||||
if 'failure_reason' not in ctx.summary:
|
||||
ctx.summary['failure_reason'] = str(e)
|
||||
msg = 'Saw exception from tasks.'
|
||||
sentry = get_sentry_client(ctx)
|
||||
log.exception('Saw exception from tasks.')
|
||||
sentry = get_sentry_client()
|
||||
if sentry:
|
||||
exc_id = sentry.captureException()
|
||||
msg += " Sentry id %s" % exc_id
|
||||
log.exception(msg)
|
||||
tags = {
|
||||
'task': taskname,
|
||||
'owner': ctx.owner,
|
||||
}
|
||||
exc_id = sentry.get_ident(sentry.captureException(tags=tags))
|
||||
event_url = "{server}/search?q={id}".format(
|
||||
server=teuth_config.sentry_server.strip('/'), id=exc_id)
|
||||
log.exception(" Sentry event: %s" % event_url)
|
||||
sentry_url_list = ctx.summary.get('sentry_events', [])
|
||||
sentry_url_list.append(event_url)
|
||||
ctx.summary['sentry_events'] = sentry_url_list
|
||||
if ctx.config.get('interactive-on-error'):
|
||||
from .task import interactive
|
||||
log.warning('Saw failure, going into interactive mode...')
|
||||
|
@ -1,13 +1,18 @@
|
||||
import logging
|
||||
from raven import Client
|
||||
from .config import config
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
client = None
|
||||
|
||||
def get_client(ctx):
|
||||
|
||||
def get_client():
|
||||
global client
|
||||
if client:
|
||||
return client
|
||||
dsn = ctx.teuthology_config.get('sentry_dsn')
|
||||
|
||||
dsn = config.sentry_dsn
|
||||
if dsn:
|
||||
client = Client(dsn=dsn)
|
||||
return client
|
||||
|
||||
|
@ -4,6 +4,8 @@ import errno
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from textwrap import dedent, fill
|
||||
|
||||
# this file is responsible for submitting tests into the queue
|
||||
# by generating combinations of facets found in
|
||||
@ -17,6 +19,7 @@ import yaml
|
||||
from teuthology import misc as teuthology
|
||||
from teuthology import safepath
|
||||
from teuthology import lock as lock
|
||||
from teuthology.config import config
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@ -202,11 +205,8 @@ def ls():
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
for j in sorted(os.listdir(args.archive_dir)):
|
||||
for j in get_jobs(args.archive_dir):
|
||||
job_dir = os.path.join(args.archive_dir, j)
|
||||
if j.startswith('.') or not os.path.isdir(job_dir):
|
||||
continue
|
||||
|
||||
summary = {}
|
||||
try:
|
||||
with file(os.path.join(job_dir, 'summary.yaml')) as f:
|
||||
@ -337,13 +337,14 @@ def results():
|
||||
log.exception('error generating results')
|
||||
raise
|
||||
|
||||
|
||||
def _results(args):
|
||||
running_tests = [
|
||||
f for f in sorted(os.listdir(args.archive_dir))
|
||||
if not f.startswith('.')
|
||||
and os.path.isdir(os.path.join(args.archive_dir, f))
|
||||
and not os.path.exists(os.path.join(args.archive_dir, f, 'summary.yaml'))
|
||||
]
|
||||
]
|
||||
starttime = time.time()
|
||||
log.info('Waiting up to %d seconds for tests to finish...', args.timeout)
|
||||
while running_tests and args.timeout > 0:
|
||||
@ -359,67 +360,8 @@ def _results(args):
|
||||
time.sleep(10)
|
||||
log.info('Tests finished! gathering results...')
|
||||
|
||||
descriptions = []
|
||||
failures = []
|
||||
num_failures = 0
|
||||
unfinished = []
|
||||
passed = []
|
||||
all_jobs = sorted(os.listdir(args.archive_dir))
|
||||
for j in all_jobs:
|
||||
job_dir = os.path.join(args.archive_dir, j)
|
||||
if j.startswith('.') or not os.path.isdir(job_dir):
|
||||
continue
|
||||
summary_fn = os.path.join(job_dir, 'summary.yaml')
|
||||
if not os.path.exists(summary_fn):
|
||||
unfinished.append(j)
|
||||
continue
|
||||
summary = {}
|
||||
with file(summary_fn) as f:
|
||||
g = yaml.safe_load_all(f)
|
||||
for new in g:
|
||||
summary.update(new)
|
||||
desc = '{test}: ({duration}s) {desc}'.format(
|
||||
duration=int(summary.get('duration', 0)),
|
||||
desc=summary['description'],
|
||||
test=j,
|
||||
)
|
||||
descriptions.append(desc)
|
||||
if summary['success']:
|
||||
passed.append(desc)
|
||||
else:
|
||||
failures.append(desc)
|
||||
num_failures += 1
|
||||
if 'failure_reason' in summary:
|
||||
failures.append(' {reason}'.format(
|
||||
reason=summary['failure_reason'],
|
||||
))
|
||||
|
||||
if failures or unfinished:
|
||||
subject = ('{num_failed} failed, {num_hung} hung, '
|
||||
'{num_passed} passed in {suite}'.format(
|
||||
num_failed=num_failures,
|
||||
num_hung=len(unfinished),
|
||||
num_passed=len(passed),
|
||||
suite=args.name,
|
||||
))
|
||||
body = """
|
||||
The following tests failed:
|
||||
|
||||
{failures}
|
||||
|
||||
These tests may be hung (did not finish in {timeout} seconds after the last test in the suite):
|
||||
{unfinished}
|
||||
|
||||
These tests passed:
|
||||
{passed}""".format(
|
||||
failures='\n'.join(failures),
|
||||
unfinished='\n'.join(unfinished),
|
||||
passed='\n'.join(passed),
|
||||
timeout=args.timeout,
|
||||
)
|
||||
else:
|
||||
subject = '{num_passed} passed in {suite}'.format(suite=args.name, num_passed=len(passed))
|
||||
body = '\n'.join(descriptions)
|
||||
(subject, body) = build_email_body(args.name, args.archive_dir,
|
||||
args.timeout)
|
||||
|
||||
try:
|
||||
if args.email:
|
||||
@ -428,10 +370,167 @@ These tests passed:
|
||||
from_=args.teuthology_config['results_sending_email'],
|
||||
to=args.email,
|
||||
body=body,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
generate_coverage(args)
|
||||
|
||||
|
||||
def get_http_log_path(archive_dir, job_id):
|
||||
http_base = config.archive_server
|
||||
if not http_base:
|
||||
return None
|
||||
archive_subdir = os.path.split(archive_dir)[-1]
|
||||
return os.path.join(http_base, archive_subdir, str(job_id))
|
||||
|
||||
|
||||
def get_jobs(archive_dir):
|
||||
dir_contents = os.listdir(archive_dir)
|
||||
|
||||
def is_job_dir(parent, subdir):
|
||||
if os.path.isdir(os.path.join(parent, subdir)) and re.match('\d+$', subdir):
|
||||
return True
|
||||
return False
|
||||
|
||||
jobs = [job for job in dir_contents if is_job_dir(archive_dir, job)]
|
||||
return sorted(jobs)
|
||||
|
||||
|
||||
email_templates = {
|
||||
'body_templ': dedent("""\
|
||||
Test Run
|
||||
NOTE: Apologies for links inside the Inktank firewall; we are working to make them public.
|
||||
=================================================================
|
||||
logs: {log_root}
|
||||
failed: {fail_count}
|
||||
hung: {hung_count}
|
||||
passed: {pass_count}
|
||||
|
||||
{fail_sect}{hung_sect}{pass_sect}
|
||||
"""),
|
||||
'sect_templ': dedent("""\
|
||||
{title}
|
||||
=================================================================
|
||||
{jobs}
|
||||
"""),
|
||||
'fail_templ': dedent("""\
|
||||
[{job_id}] {desc}
|
||||
-----------------------------------------------------------------
|
||||
time: {time}s{log_line}{sentry_line}
|
||||
|
||||
{reason}
|
||||
|
||||
"""),
|
||||
'fail_log_templ': "\nlog: {log}",
|
||||
'fail_sentry_templ': "\nsentry: {sentries}",
|
||||
'hung_templ': dedent("""\
|
||||
[{job_id}]
|
||||
"""),
|
||||
'pass_templ': dedent("""\
|
||||
[{job_id}] {desc}
|
||||
time: {time}s
|
||||
|
||||
"""),
|
||||
}
|
||||
|
||||
|
||||
|
||||
def build_email_body(name, archive_dir, timeout):
|
||||
failed = {}
|
||||
hung = {}
|
||||
passed = {}
|
||||
|
||||
for job in get_jobs(archive_dir):
|
||||
job_dir = os.path.join(archive_dir, job)
|
||||
summary_file = os.path.join(job_dir, 'summary.yaml')
|
||||
|
||||
# Unfinished jobs will have no summary.yaml
|
||||
if not os.path.exists(summary_file):
|
||||
hung[job] = email_templates['hung_templ'].format(job_id=job)
|
||||
continue
|
||||
|
||||
with file(summary_file) as f:
|
||||
summary = yaml.safe_load(f)
|
||||
|
||||
if summary['success']:
|
||||
passed[job] = email_templates['pass_templ'].format(
|
||||
job_id=job,
|
||||
desc=summary.get('description'),
|
||||
time=int(summary.get('duration')),
|
||||
)
|
||||
else:
|
||||
log = get_http_log_path(archive_dir, job)
|
||||
if log:
|
||||
log_line = email_templates['fail_log_templ'].format(log=log)
|
||||
else:
|
||||
log_line = ''
|
||||
sentry_events = summary.get('sentry_events')
|
||||
if sentry_events:
|
||||
sentry_line = email_templates['fail_sentry_templ'].format(
|
||||
sentries='\n '.join(sentry_events))
|
||||
else:
|
||||
sentry_line = ''
|
||||
|
||||
# 'fill' is from the textwrap module and it collapses a given
|
||||
# string into multiple lines of a maximum width as specified. We
|
||||
# want 75 characters here so that when we indent by 4 on the next
|
||||
# line, we have 79-character exception paragraphs.
|
||||
reason = fill(summary.get('failure_reason'), 75)
|
||||
reason = '\n'.join((' ') + line for line in reason.splitlines())
|
||||
|
||||
failed[job] = email_templates['fail_templ'].format(
|
||||
job_id=job,
|
||||
desc=summary.get('description'),
|
||||
time=int(summary.get('duration')),
|
||||
reason=reason,
|
||||
log_line=log_line,
|
||||
sentry_line=sentry_line,
|
||||
)
|
||||
|
||||
maybe_comma = lambda s: ', ' if s else ' '
|
||||
|
||||
subject = ''
|
||||
fail_sect = ''
|
||||
hung_sect = ''
|
||||
pass_sect = ''
|
||||
if failed:
|
||||
subject += '{num_failed} failed{sep}'.format(
|
||||
num_failed=len(failed),
|
||||
sep=maybe_comma(hung or passed)
|
||||
)
|
||||
fail_sect = email_templates['sect_templ'].format(
|
||||
title='Failed',
|
||||
jobs=''.join(failed.values())
|
||||
)
|
||||
if hung:
|
||||
subject += '{num_hung} hung{sep}'.format(
|
||||
num_hung=len(hung),
|
||||
sep=maybe_comma(passed),
|
||||
)
|
||||
hung_sect = email_templates['sect_templ'].format(
|
||||
title='Hung',
|
||||
jobs=''.join(hung.values()),
|
||||
)
|
||||
if passed:
|
||||
subject += '%s passed ' % len(passed)
|
||||
pass_sect = email_templates['sect_templ'].format(
|
||||
title='Passed',
|
||||
jobs=''.join(passed.values()),
|
||||
)
|
||||
|
||||
body = email_templates['body_templ'].format(
|
||||
log_root=get_http_log_path(archive_dir, ''),
|
||||
fail_count=len(failed),
|
||||
hung_count=len(hung),
|
||||
pass_count=len(passed),
|
||||
fail_sect=fail_sect,
|
||||
hung_sect=hung_sect,
|
||||
pass_sect=pass_sect,
|
||||
)
|
||||
|
||||
subject += 'in {suite}'.format(suite=name)
|
||||
return (subject.strip(), body.strip())
|
||||
|
||||
|
||||
def get_arch(config):
|
||||
for yamlfile in config:
|
||||
y = yaml.safe_load(file(yamlfile))
|
||||
@ -445,6 +544,7 @@ def get_arch(config):
|
||||
return arch
|
||||
return None
|
||||
|
||||
|
||||
def get_os_type(configs):
|
||||
for config in configs:
|
||||
yamlfile = config[2]
|
||||
@ -456,6 +556,7 @@ def get_os_type(configs):
|
||||
return os_type
|
||||
return None
|
||||
|
||||
|
||||
def get_exclude_arch(configs):
|
||||
for config in configs:
|
||||
yamlfile = config[2]
|
||||
@ -467,6 +568,7 @@ def get_exclude_arch(configs):
|
||||
return exclude_arch
|
||||
return None
|
||||
|
||||
|
||||
def get_exclude_os_type(configs):
|
||||
for config in configs:
|
||||
yamlfile = config[2]
|
||||
@ -478,6 +580,7 @@ def get_exclude_os_type(configs):
|
||||
return exclude_os_type
|
||||
return None
|
||||
|
||||
|
||||
def get_machine_type(config):
|
||||
for yamlfile in config:
|
||||
y = yaml.safe_load(file(yamlfile))
|
||||
|
Loading…
Reference in New Issue
Block a user