Merge pull request #54 from ceph/sentry

Add logs and Sentry events to suite emails
This commit is contained in:
Alfredo Deza 2013-08-28 09:38:20 -07:00
commit a472f5017a
4 changed files with 229 additions and 75 deletions

35
teuthology/config.py Normal file
View File

@ -0,0 +1,35 @@
import os
import yaml
import logging
CONF_FILE = os.path.join(os.environ['HOME'], '.teuthology.yaml')
log = logging.getLogger(__name__)
class _Config(object):
"""
This class is intended to unify teuthology's many configuration files and
objects. Currently it serves as a convenient interface to
~/.teuthology.yaml and nothing else.
"""
def __init__(self):
if os.path.exists(CONF_FILE):
self.__conf = yaml.safe_load(file(CONF_FILE))
else:
log.debug("%s not found", CONF_FILE)
self.__conf = {}
# This property declaration exists mainly as an example; it is not
# necessary unless you want to, say, define a set method and/or a
# docstring.
@property
def lock_server(self):
return self.__conf.get('lock_server')
# This takes care of any and all of the rest.
# If the parameter is defined, return it. Otherwise return None.
def __getattr__(self, name):
return self.__conf.get(name)
config = _Config()

View File

@ -1,9 +1,11 @@
import sys
import logging
from teuthology.sentry import get_client as get_sentry_client
from .config import config as teuth_config
log = logging.getLogger(__name__)
def run_one_task(taskname, **kwargs):
submod = taskname
subtask = 'task'
@ -14,6 +16,7 @@ def run_one_task(taskname, **kwargs):
fn = getattr(mod, subtask)
return fn(**kwargs)
def run_tasks(tasks, ctx):
stack = []
try:
@ -31,12 +34,20 @@ def run_tasks(tasks, ctx):
ctx.summary['success'] = False
if 'failure_reason' not in ctx.summary:
ctx.summary['failure_reason'] = str(e)
msg = 'Saw exception from tasks.'
sentry = get_sentry_client(ctx)
log.exception('Saw exception from tasks.')
sentry = get_sentry_client()
if sentry:
exc_id = sentry.captureException()
msg += " Sentry id %s" % exc_id
log.exception(msg)
tags = {
'task': taskname,
'owner': ctx.owner,
}
exc_id = sentry.get_ident(sentry.captureException(tags=tags))
event_url = "{server}/search?q={id}".format(
server=teuth_config.sentry_server.strip('/'), id=exc_id)
log.exception(" Sentry event: %s" % event_url)
sentry_url_list = ctx.summary.get('sentry_events', [])
sentry_url_list.append(event_url)
ctx.summary['sentry_events'] = sentry_url_list
if ctx.config.get('interactive-on-error'):
from .task import interactive
log.warning('Saw failure, going into interactive mode...')

View File

@ -1,13 +1,18 @@
import logging
from raven import Client
from .config import config
log = logging.getLogger(__name__)
client = None
def get_client(ctx):
def get_client():
global client
if client:
return client
dsn = ctx.teuthology_config.get('sentry_dsn')
dsn = config.sentry_dsn
if dsn:
client = Client(dsn=dsn)
return client

View File

@ -4,6 +4,8 @@ import errno
import itertools
import logging
import os
import re
from textwrap import dedent, fill
# this file is responsible for submitting tests into the queue
# by generating combinations of facets found in
@ -17,6 +19,7 @@ import yaml
from teuthology import misc as teuthology
from teuthology import safepath
from teuthology import lock as lock
from teuthology.config import config
log = logging.getLogger(__name__)
@ -202,11 +205,8 @@ def ls():
)
args = parser.parse_args()
for j in sorted(os.listdir(args.archive_dir)):
for j in get_jobs(args.archive_dir):
job_dir = os.path.join(args.archive_dir, j)
if j.startswith('.') or not os.path.isdir(job_dir):
continue
summary = {}
try:
with file(os.path.join(job_dir, 'summary.yaml')) as f:
@ -337,13 +337,14 @@ def results():
log.exception('error generating results')
raise
def _results(args):
running_tests = [
f for f in sorted(os.listdir(args.archive_dir))
if not f.startswith('.')
and os.path.isdir(os.path.join(args.archive_dir, f))
and not os.path.exists(os.path.join(args.archive_dir, f, 'summary.yaml'))
]
]
starttime = time.time()
log.info('Waiting up to %d seconds for tests to finish...', args.timeout)
while running_tests and args.timeout > 0:
@ -359,67 +360,8 @@ def _results(args):
time.sleep(10)
log.info('Tests finished! gathering results...')
descriptions = []
failures = []
num_failures = 0
unfinished = []
passed = []
all_jobs = sorted(os.listdir(args.archive_dir))
for j in all_jobs:
job_dir = os.path.join(args.archive_dir, j)
if j.startswith('.') or not os.path.isdir(job_dir):
continue
summary_fn = os.path.join(job_dir, 'summary.yaml')
if not os.path.exists(summary_fn):
unfinished.append(j)
continue
summary = {}
with file(summary_fn) as f:
g = yaml.safe_load_all(f)
for new in g:
summary.update(new)
desc = '{test}: ({duration}s) {desc}'.format(
duration=int(summary.get('duration', 0)),
desc=summary['description'],
test=j,
)
descriptions.append(desc)
if summary['success']:
passed.append(desc)
else:
failures.append(desc)
num_failures += 1
if 'failure_reason' in summary:
failures.append(' {reason}'.format(
reason=summary['failure_reason'],
))
if failures or unfinished:
subject = ('{num_failed} failed, {num_hung} hung, '
'{num_passed} passed in {suite}'.format(
num_failed=num_failures,
num_hung=len(unfinished),
num_passed=len(passed),
suite=args.name,
))
body = """
The following tests failed:
{failures}
These tests may be hung (did not finish in {timeout} seconds after the last test in the suite):
{unfinished}
These tests passed:
{passed}""".format(
failures='\n'.join(failures),
unfinished='\n'.join(unfinished),
passed='\n'.join(passed),
timeout=args.timeout,
)
else:
subject = '{num_passed} passed in {suite}'.format(suite=args.name, num_passed=len(passed))
body = '\n'.join(descriptions)
(subject, body) = build_email_body(args.name, args.archive_dir,
args.timeout)
try:
if args.email:
@ -428,10 +370,167 @@ These tests passed:
from_=args.teuthology_config['results_sending_email'],
to=args.email,
body=body,
)
)
finally:
generate_coverage(args)
def get_http_log_path(archive_dir, job_id):
http_base = config.archive_server
if not http_base:
return None
archive_subdir = os.path.split(archive_dir)[-1]
return os.path.join(http_base, archive_subdir, str(job_id))
def get_jobs(archive_dir):
dir_contents = os.listdir(archive_dir)
def is_job_dir(parent, subdir):
if os.path.isdir(os.path.join(parent, subdir)) and re.match('\d+$', subdir):
return True
return False
jobs = [job for job in dir_contents if is_job_dir(archive_dir, job)]
return sorted(jobs)
email_templates = {
'body_templ': dedent("""\
Test Run
NOTE: Apologies for links inside the Inktank firewall; we are working to make them public.
=================================================================
logs: {log_root}
failed: {fail_count}
hung: {hung_count}
passed: {pass_count}
{fail_sect}{hung_sect}{pass_sect}
"""),
'sect_templ': dedent("""\
{title}
=================================================================
{jobs}
"""),
'fail_templ': dedent("""\
[{job_id}] {desc}
-----------------------------------------------------------------
time: {time}s{log_line}{sentry_line}
{reason}
"""),
'fail_log_templ': "\nlog: {log}",
'fail_sentry_templ': "\nsentry: {sentries}",
'hung_templ': dedent("""\
[{job_id}]
"""),
'pass_templ': dedent("""\
[{job_id}] {desc}
time: {time}s
"""),
}
def build_email_body(name, archive_dir, timeout):
failed = {}
hung = {}
passed = {}
for job in get_jobs(archive_dir):
job_dir = os.path.join(archive_dir, job)
summary_file = os.path.join(job_dir, 'summary.yaml')
# Unfinished jobs will have no summary.yaml
if not os.path.exists(summary_file):
hung[job] = email_templates['hung_templ'].format(job_id=job)
continue
with file(summary_file) as f:
summary = yaml.safe_load(f)
if summary['success']:
passed[job] = email_templates['pass_templ'].format(
job_id=job,
desc=summary.get('description'),
time=int(summary.get('duration')),
)
else:
log = get_http_log_path(archive_dir, job)
if log:
log_line = email_templates['fail_log_templ'].format(log=log)
else:
log_line = ''
sentry_events = summary.get('sentry_events')
if sentry_events:
sentry_line = email_templates['fail_sentry_templ'].format(
sentries='\n '.join(sentry_events))
else:
sentry_line = ''
# 'fill' is from the textwrap module and it collapses a given
# string into multiple lines of a maximum width as specified. We
# want 75 characters here so that when we indent by 4 on the next
# line, we have 79-character exception paragraphs.
reason = fill(summary.get('failure_reason'), 75)
reason = '\n'.join((' ') + line for line in reason.splitlines())
failed[job] = email_templates['fail_templ'].format(
job_id=job,
desc=summary.get('description'),
time=int(summary.get('duration')),
reason=reason,
log_line=log_line,
sentry_line=sentry_line,
)
maybe_comma = lambda s: ', ' if s else ' '
subject = ''
fail_sect = ''
hung_sect = ''
pass_sect = ''
if failed:
subject += '{num_failed} failed{sep}'.format(
num_failed=len(failed),
sep=maybe_comma(hung or passed)
)
fail_sect = email_templates['sect_templ'].format(
title='Failed',
jobs=''.join(failed.values())
)
if hung:
subject += '{num_hung} hung{sep}'.format(
num_hung=len(hung),
sep=maybe_comma(passed),
)
hung_sect = email_templates['sect_templ'].format(
title='Hung',
jobs=''.join(hung.values()),
)
if passed:
subject += '%s passed ' % len(passed)
pass_sect = email_templates['sect_templ'].format(
title='Passed',
jobs=''.join(passed.values()),
)
body = email_templates['body_templ'].format(
log_root=get_http_log_path(archive_dir, ''),
fail_count=len(failed),
hung_count=len(hung),
pass_count=len(passed),
fail_sect=fail_sect,
hung_sect=hung_sect,
pass_sect=pass_sect,
)
subject += 'in {suite}'.format(suite=name)
return (subject.strip(), body.strip())
def get_arch(config):
for yamlfile in config:
y = yaml.safe_load(file(yamlfile))
@ -445,6 +544,7 @@ def get_arch(config):
return arch
return None
def get_os_type(configs):
for config in configs:
yamlfile = config[2]
@ -456,6 +556,7 @@ def get_os_type(configs):
return os_type
return None
def get_exclude_arch(configs):
for config in configs:
yamlfile = config[2]
@ -467,6 +568,7 @@ def get_exclude_arch(configs):
return exclude_arch
return None
def get_exclude_os_type(configs):
for config in configs:
yamlfile = config[2]
@ -478,6 +580,7 @@ def get_exclude_os_type(configs):
return exclude_os_type
return None
def get_machine_type(config):
for yamlfile in config:
y = yaml.safe_load(file(yamlfile))