2013-10-16 19:31:26 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import yaml
|
|
|
|
import psutil
|
|
|
|
import subprocess
|
|
|
|
import tempfile
|
2013-10-17 14:42:02 +00:00
|
|
|
import logging
|
2014-04-28 16:12:29 +00:00
|
|
|
import getpass
|
2013-10-16 19:31:26 +00:00
|
|
|
|
2014-06-27 20:34:39 +00:00
|
|
|
from . import beanstalk
|
2014-04-14 20:38:51 +00:00
|
|
|
from . import report
|
2013-10-16 19:31:26 +00:00
|
|
|
from .config import config
|
|
|
|
|
2013-10-17 14:42:02 +00:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2013-10-16 19:31:26 +00:00
|
|
|
|
|
|
|
def main(args):
|
2013-12-19 21:39:15 +00:00
|
|
|
run_name = args['--run']
|
2013-12-19 22:43:11 +00:00
|
|
|
job = args['--job']
|
2014-06-20 15:36:55 +00:00
|
|
|
jobspec = args['--jobspec']
|
2013-10-16 19:31:26 +00:00
|
|
|
archive_base = args['--archive']
|
|
|
|
owner = args['--owner']
|
|
|
|
machine_type = args['--machine_type']
|
2014-03-25 15:15:41 +00:00
|
|
|
preserve_queue = args['--preserve-queue']
|
2013-10-16 19:31:26 +00:00
|
|
|
|
2014-06-20 15:36:55 +00:00
|
|
|
if jobspec:
|
|
|
|
split_spec = jobspec.split('/')
|
|
|
|
run_name = split_spec[0]
|
|
|
|
job = [split_spec[1]]
|
|
|
|
|
2013-12-19 22:43:11 +00:00
|
|
|
if job:
|
|
|
|
for job_id in job:
|
|
|
|
kill_job(run_name, job_id, archive_base, owner, machine_type)
|
2013-12-19 22:12:56 +00:00
|
|
|
else:
|
2014-03-25 15:15:41 +00:00
|
|
|
kill_run(run_name, archive_base, owner, machine_type,
|
|
|
|
preserve_queue=preserve_queue)
|
2013-10-16 19:31:26 +00:00
|
|
|
|
|
|
|
|
2014-03-25 15:15:41 +00:00
|
|
|
def kill_run(run_name, archive_base=None, owner=None, machine_type=None,
|
|
|
|
preserve_queue=False):
|
2013-12-19 21:39:15 +00:00
|
|
|
run_info = {}
|
2014-06-02 23:29:55 +00:00
|
|
|
serializer = report.ResultsSerializer(archive_base)
|
2013-10-16 19:31:26 +00:00
|
|
|
if archive_base:
|
2013-12-19 21:39:15 +00:00
|
|
|
run_archive_dir = os.path.join(archive_base, run_name)
|
2014-02-05 19:31:10 +00:00
|
|
|
if os.path.isdir(run_archive_dir):
|
2014-06-02 23:29:55 +00:00
|
|
|
run_info = find_run_info(serializer, run_name)
|
2014-02-05 19:31:10 +00:00
|
|
|
machine_type = run_info['machine_type']
|
|
|
|
owner = run_info['owner']
|
|
|
|
elif machine_type is None:
|
|
|
|
raise RuntimeError("The run is still entirely enqueued; " +
|
|
|
|
"you must also pass --machine-type")
|
2013-10-16 19:31:26 +00:00
|
|
|
|
2014-03-25 15:15:41 +00:00
|
|
|
if not preserve_queue:
|
|
|
|
remove_beanstalk_jobs(run_name, machine_type)
|
2014-04-14 20:38:51 +00:00
|
|
|
remove_paddles_jobs(run_name)
|
2013-12-19 21:39:15 +00:00
|
|
|
kill_processes(run_name, run_info.get('pids'))
|
2014-02-05 19:31:10 +00:00
|
|
|
if owner is not None:
|
|
|
|
targets = find_targets(run_name, owner)
|
|
|
|
nuke_targets(targets, owner)
|
2013-12-19 22:12:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def kill_job(run_name, job_id, archive_base=None, owner=None,
|
|
|
|
machine_type=None):
|
2014-06-02 23:29:55 +00:00
|
|
|
serializer = report.ResultsSerializer(archive_base)
|
|
|
|
job_info = serializer.job_info(run_name, job_id)
|
2013-12-19 22:12:56 +00:00
|
|
|
owner = job_info['owner']
|
|
|
|
kill_processes(run_name, [job_info.get('pid')])
|
2014-04-01 02:07:24 +00:00
|
|
|
targets = dict(targets=job_info.get('targets', {}))
|
2013-12-19 22:12:56 +00:00
|
|
|
nuke_targets(targets, owner)
|
2013-10-16 19:31:26 +00:00
|
|
|
|
|
|
|
|
2014-06-02 23:29:55 +00:00
|
|
|
def find_run_info(serializer, run_name):
|
2014-03-25 20:50:03 +00:00
|
|
|
log.info("Assembling run information...")
|
2013-12-19 21:39:15 +00:00
|
|
|
run_info_fields = [
|
2013-10-17 18:02:30 +00:00
|
|
|
'machine_type',
|
|
|
|
'owner',
|
|
|
|
]
|
|
|
|
|
2014-06-02 23:29:55 +00:00
|
|
|
pids = []
|
|
|
|
run_info = {}
|
2013-10-16 19:31:26 +00:00
|
|
|
job_info = {}
|
2014-06-02 23:29:55 +00:00
|
|
|
for (job_id, job_dir) in serializer.jobs_for_run(run_name).iteritems():
|
2013-10-17 18:02:30 +00:00
|
|
|
if not os.path.isdir(job_dir):
|
2013-12-10 22:47:35 +00:00
|
|
|
continue
|
2014-06-02 23:29:55 +00:00
|
|
|
job_info = serializer.job_info(run_name, job_id)
|
2013-10-17 18:02:30 +00:00
|
|
|
for key in job_info.keys():
|
2013-12-19 21:39:15 +00:00
|
|
|
if key in run_info_fields and key not in run_info:
|
|
|
|
run_info[key] = job_info[key]
|
2013-12-10 22:25:28 +00:00
|
|
|
if 'pid' in job_info:
|
2014-06-02 23:29:55 +00:00
|
|
|
pids.append(job_info['pid'])
|
|
|
|
run_info['pids'] = pids
|
2013-12-19 21:39:15 +00:00
|
|
|
return run_info
|
2013-10-16 19:31:26 +00:00
|
|
|
|
|
|
|
|
2014-04-14 20:38:51 +00:00
|
|
|
def remove_paddles_jobs(run_name):
|
|
|
|
jobs = report.ResultsReporter().get_jobs(run_name, fields=['status'])
|
|
|
|
job_ids = [job['job_id'] for job in jobs if job['status'] == 'queued']
|
|
|
|
if job_ids:
|
|
|
|
log.info("Deleting jobs from paddles: %s", str(job_ids))
|
|
|
|
report.try_delete_jobs(run_name, job_ids)
|
|
|
|
|
|
|
|
|
2013-12-19 21:39:15 +00:00
|
|
|
def remove_beanstalk_jobs(run_name, tube_name):
|
2013-10-16 19:31:26 +00:00
|
|
|
qhost = config.queue_host
|
|
|
|
qport = config.queue_port
|
|
|
|
if qhost is None or qport is None:
|
|
|
|
raise RuntimeError(
|
|
|
|
'Beanstalk queue information not found in {conf_path}'.format(
|
|
|
|
conf_path=config.teuthology_yaml))
|
2013-10-17 14:42:02 +00:00
|
|
|
log.info("Checking Beanstalk Queue...")
|
2014-06-27 20:34:39 +00:00
|
|
|
beanstalk_conn = beanstalk.connect()
|
2014-06-27 20:42:18 +00:00
|
|
|
real_tube_name = beanstalk.watch_tube(beanstalk_conn, tube_name)
|
2013-10-16 19:31:26 +00:00
|
|
|
|
2014-06-27 20:42:18 +00:00
|
|
|
curjobs = beanstalk_conn.stats_tube(real_tube_name)['current-jobs-ready']
|
2013-10-16 19:31:26 +00:00
|
|
|
if curjobs != 0:
|
|
|
|
x = 1
|
|
|
|
while x != curjobs:
|
|
|
|
x += 1
|
2014-06-27 20:34:39 +00:00
|
|
|
job = beanstalk_conn.reserve(timeout=20)
|
2014-03-26 17:33:40 +00:00
|
|
|
if job is None:
|
|
|
|
continue
|
2013-10-16 19:31:26 +00:00
|
|
|
job_config = yaml.safe_load(job.body)
|
2013-12-19 21:39:15 +00:00
|
|
|
if run_name == job_config['name']:
|
2013-10-16 19:31:26 +00:00
|
|
|
job_id = job.stats()['id']
|
|
|
|
msg = "Deleting job from queue. ID: " + \
|
|
|
|
"{id} Name: {name} Desc: {desc}".format(
|
|
|
|
id=str(job_id),
|
|
|
|
name=job_config['name'],
|
|
|
|
desc=job_config['description'],
|
|
|
|
)
|
2013-10-17 14:42:02 +00:00
|
|
|
log.info(msg)
|
2013-10-16 19:31:26 +00:00
|
|
|
job.delete()
|
|
|
|
else:
|
|
|
|
print "No jobs in Beanstalk Queue"
|
2014-06-27 20:34:39 +00:00
|
|
|
beanstalk_conn.close()
|
2013-10-16 19:31:26 +00:00
|
|
|
|
|
|
|
|
2013-12-19 21:39:15 +00:00
|
|
|
def kill_processes(run_name, pids=None):
|
2013-10-17 18:02:30 +00:00
|
|
|
if pids:
|
2014-04-16 20:19:01 +00:00
|
|
|
to_kill = set(pids).intersection(psutil.pids())
|
2013-10-17 18:02:30 +00:00
|
|
|
else:
|
2013-12-19 21:39:15 +00:00
|
|
|
to_kill = find_pids(run_name)
|
2013-10-17 18:02:30 +00:00
|
|
|
|
|
|
|
if len(to_kill) == 0:
|
|
|
|
log.info("No teuthology processes running")
|
|
|
|
else:
|
|
|
|
log.info("Killing Pids: " + str(to_kill))
|
|
|
|
for pid in to_kill:
|
2014-04-28 16:12:29 +00:00
|
|
|
args = ['kill', str(pid)]
|
|
|
|
# Don't attempt to use sudo if it's not necessary
|
|
|
|
proc_user = psutil.Process(int(pid)).username()
|
|
|
|
if proc_user != getpass.getuser():
|
|
|
|
args.insert(0, 'sudo')
|
|
|
|
subprocess.call(args)
|
2013-10-17 18:02:30 +00:00
|
|
|
|
|
|
|
|
2013-12-19 22:12:56 +00:00
|
|
|
def process_matches_run(pid, run_name):
|
|
|
|
try:
|
|
|
|
p = psutil.Process(pid)
|
2014-04-16 20:19:01 +00:00
|
|
|
cmd = p.cmdline()
|
|
|
|
if run_name in cmd and sys.argv[0] not in cmd:
|
2013-12-19 22:12:56 +00:00
|
|
|
return True
|
|
|
|
except psutil.NoSuchProcess:
|
|
|
|
pass
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2013-12-19 21:39:15 +00:00
|
|
|
def find_pids(run_name):
|
|
|
|
run_pids = []
|
2014-04-16 20:19:01 +00:00
|
|
|
for pid in psutil.pids():
|
2013-12-19 22:12:56 +00:00
|
|
|
if process_matches_run(pid, run_name):
|
|
|
|
run_pids.append(pid)
|
2013-12-19 21:39:15 +00:00
|
|
|
return run_pids
|
2013-10-16 19:31:26 +00:00
|
|
|
|
|
|
|
|
2013-12-19 21:39:15 +00:00
|
|
|
def find_targets(run_name, owner):
|
2013-10-17 14:42:02 +00:00
|
|
|
lock_args = [
|
|
|
|
'teuthology-lock',
|
|
|
|
'--list-targets',
|
|
|
|
'--desc-pattern',
|
2013-12-19 21:39:15 +00:00
|
|
|
'/' + run_name + '/',
|
2013-10-17 14:42:02 +00:00
|
|
|
'--status',
|
|
|
|
'up',
|
|
|
|
'--owner',
|
|
|
|
owner
|
|
|
|
]
|
|
|
|
proc = subprocess.Popen(lock_args, stdout=subprocess.PIPE)
|
|
|
|
stdout, stderr = proc.communicate()
|
|
|
|
out_obj = yaml.safe_load(stdout)
|
|
|
|
if not out_obj or 'targets' not in out_obj:
|
|
|
|
return {}
|
|
|
|
|
|
|
|
return out_obj
|
|
|
|
|
|
|
|
|
2013-12-19 22:12:56 +00:00
|
|
|
def nuke_targets(targets_dict, owner):
|
2013-10-17 14:42:02 +00:00
|
|
|
targets = targets_dict.get('targets')
|
|
|
|
if not targets:
|
|
|
|
log.info("No locked machines. Not nuking anything")
|
|
|
|
|
|
|
|
to_nuke = []
|
2013-10-16 19:31:26 +00:00
|
|
|
for target in targets:
|
2013-10-17 14:42:02 +00:00
|
|
|
to_nuke.append(target.split('@')[1].split('.')[0])
|
|
|
|
|
|
|
|
target_file = tempfile.NamedTemporaryFile(delete=False)
|
|
|
|
target_file.write(yaml.safe_dump(targets_dict))
|
|
|
|
target_file.close()
|
|
|
|
|
|
|
|
log.info("Nuking machines: " + str(to_nuke))
|
|
|
|
nuke_args = [
|
|
|
|
'teuthology-nuke',
|
|
|
|
'-t',
|
|
|
|
target_file.name,
|
|
|
|
'--unlock',
|
|
|
|
'-r',
|
|
|
|
'--owner',
|
|
|
|
owner
|
|
|
|
]
|
|
|
|
proc = subprocess.Popen(
|
|
|
|
nuke_args,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.STDOUT)
|
|
|
|
for line in iter(proc.stdout.readline, ''):
|
|
|
|
line = line.replace('\r', '').replace('\n', '')
|
|
|
|
log.info(line)
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
os.unlink(target_file.name)
|