import argparse import json import logging import subprocess import urllib import yaml import re import collections import tempfile import os import time import teuthology from .config import config from . import lockstatus as ls from . import misc from teuthology.misc import get_distro from teuthology.misc import get_distro_version log = logging.getLogger(__name__) def lock_many(ctx, num, machinetype, user=None, description=None): machinetypes = misc.get_multi_machine_types(machinetype) if user is None: user = misc.get_user() for machinetype in machinetypes: success, content, status = ls.send_request( 'POST', config.lock_server, urllib.urlencode( dict( user=user, num=num, machinetype=machinetype, desc=description, ))) if success: machines = json.loads(content) log.debug('locked {machines}'.format( machines=', '.join(machines.keys()))) if machinetype == 'vps': ok_machs = {} for machine in machines: if create_if_vm(ctx, machine): ok_machs[machine] = machines[machine] else: log.error('Unable to create virtual machine: %s' % machine) unlock_one(ctx, machine) return ok_machs return machines if status == 503: log.error('Insufficient nodes available to lock %d %s nodes.', num,machinetype) else: log.error('Could not lock %d %s nodes, reason: unknown.', num, machinetype) return [] def lock_one(ctx, name, user=None, description=None): if user is None: user = misc.get_user() success, _, _ = ls.send_request( 'POST', config.lock_server + '/' + name, urllib.urlencode(dict(user=user, desc=description))) if success: log.debug('locked %s as %s', name, user) else: log.error('failed to lock %s', name) return success def unlock_one(ctx, name, user=None): if user is None: user = misc.get_user() success, _, _ = ls.send_request( 'DELETE', config.lock_server + '/' + name + '?' + urllib.urlencode(dict(user=user))) if success: log.debug('unlocked %s', name) if not destroy_if_vm(ctx, name): log.error('downburst destroy failed for %s', name) log.info('%s is not locked' % name) else: log.error('failed to unlock %s', name) return success def list_locks(): success, content, _ = ls.send_request('GET', config.lock_server) if success: return json.loads(content) return None def update_lock(ctx, name, description=None, status=None, sshpubkey=None): status_info = ls.get_status(ctx, name) phys_host = status_info['vpshost'] if phys_host: keyscan_out = '' while not keyscan_out: time.sleep(10) keyscan_out, _ = keyscan_check(ctx, [name]) updated = {} if description is not None: updated['desc'] = description if status is not None: updated['status'] = status if sshpubkey is not None: updated['sshpubkey'] = sshpubkey if updated: success, _, _ = ls.send_request( 'PUT', config.lock_server + '/' + name, body=urllib.urlencode(updated), headers={'Content-type': 'application/x-www-form-urlencoded'}) return success return True def canonicalize_hostname(s): if re.match('ubuntu@.*\.front\.sepia\.ceph\.com', s) is None: s = 'ubuntu@' + s + '.front.sepia.ceph.com' return s def main(ctx): if ctx.verbose: teuthology.log.setLevel(logging.DEBUG) misc.read_config(ctx) ret = 0 user = ctx.owner machines = [canonicalize_hostname(m) for m in ctx.machines] machines_to_update = [] if ctx.targets: try: with file(ctx.targets) as f: g = yaml.safe_load_all(f) for new in g: if 'targets' in new: for t in new['targets'].iterkeys(): machines.append(t) except IOError as e: raise argparse.ArgumentTypeError(str(e)) if ctx.f: assert ctx.lock or ctx.unlock, \ '-f is only supported by --lock and --unlock' if machines: assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \ or ctx.update, \ 'machines cannot be specified with that operation' else: assert ctx.num_to_lock or ctx.list or ctx.list_targets or ctx.summary,\ 'machines must be specified for that operation' if ctx.all: assert ctx.list or ctx.list_targets, \ '--all can only be used with --list and --list-targets' assert ctx.owner is None, \ '--all and --owner are mutually exclusive' assert not machines, \ '--all and listing specific machines are incompatible' if ctx.num_to_lock: assert ctx.machine_type, \ 'must specify machine type to lock' if ctx.brief: assert ctx.list, '--brief only applies to --list' if ctx.list or ctx.list_targets: assert ctx.desc is None, '--desc does nothing with --list' if machines: statuses = [] for machine in machines: status = ls.get_status(ctx, machine) if status: statuses.append(status) else: log.error("Lockserver doesn't know about machine: %s" % machine) else: statuses = list_locks() vmachines = [] for vmachine in statuses: if vmachine['vpshost']: if vmachine['locked']: vmachines.append(vmachine['name']) if vmachines: # Avoid ssh-keyscans for everybody when listing all machines # Listing specific machines will update the keys. if machines: scan_for_locks(ctx, vmachines) statuses = [ls.get_status(ctx, machine) for machine in machines] else: statuses = list_locks() if statuses: if ctx.machine_type: statuses = [status for status in statuses if status['type'] == ctx.machine_type] if not machines and ctx.owner is None and not ctx.all: ctx.owner = misc.get_user() if ctx.owner is not None: statuses = [status for status in statuses if status['locked_by'] == ctx.owner] if ctx.status is not None: statuses = [status for status in statuses if status['up'] == (ctx.status == 'up')] if ctx.locked is not None: statuses = [status for status in statuses if status['locked'] == (ctx.locked == 'true')] if ctx.desc is not None: statuses = [status for status in statuses if status['description'] == ctx.desc] if ctx.desc_pattern is not None: statuses = [status for status in statuses if status['description'] is not None and status['description'].find(ctx.desc_pattern) >= 0] if ctx.list: if ctx.brief: for s in statuses: locked = "un" if s['locked'] == 0 else " " mo = re.match('\w+@(\w+?)\..*', s['name']) host = mo.group(1) if mo else s['name'] print '{host} {locked}locked {owner} "{desc}"'.format( locked=locked, host=host, owner=s['locked_by'], desc=s['description']) else: print json.dumps(statuses, indent=4) else: frag = {'targets': {}} for f in statuses: frag['targets'][f['name']] = f['sshpubkey'] print yaml.safe_dump(frag, default_flow_style=False) else: log.error('error retrieving lock statuses') ret = 1 elif ctx.summary: do_summary(ctx) return 0 elif ctx.lock: for machine in machines: if not lock_one(ctx, machine, user): ret = 1 if not ctx.f: return ret else: machines_to_update.append(machine) create_if_vm(ctx, machine) elif ctx.unlock: for machine in machines: if not unlock_one(ctx, machine, user): ret = 1 if not ctx.f: return ret else: machines_to_update.append(machine) elif ctx.num_to_lock: result = lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user) if not result: ret = 1 else: machines_to_update = result.keys() if ctx.machine_type == 'vps': shortnames = ' '.join( [name.split('@')[1].split('.')[0] for name in result.keys()] ) if len(result) < ctx.num_to_lock: log.error("Locking failed.") for machn in result: unlock_one(ctx, machn) ret = 1 else: log.info("Successfully Locked:\n%s\n" % shortnames) log.info( "Unable to display keys at this time (virtual " + "machines are booting).") log.info( "Please run teuthology-lock --list-targets %s once " + "these machines come up.", shortnames) else: print yaml.safe_dump( dict(targets=result), default_flow_style=False) elif ctx.update: assert ctx.desc is not None or ctx.status is not None, \ 'you must specify description or status to update' assert ctx.owner is None, 'only description and status may be updated' machines_to_update = machines if ctx.desc is not None or ctx.status is not None: for machine in machines_to_update: update_lock(ctx, machine, ctx.desc, ctx.status) return ret def updatekeys(ctx): loglevel = logging.INFO if ctx.verbose: loglevel = logging.DEBUG logging.basicConfig( level=loglevel, ) misc.read_config(ctx) machines = [canonicalize_hostname(m) for m in ctx.machines] if ctx.targets: try: with file(ctx.targets) as f: g = yaml.safe_load_all(f) for new in g: if 'targets' in new: for t in new['targets'].iterkeys(): machines.append(t) except IOError as e: raise argparse.ArgumentTypeError(str(e)) return scan_for_locks(ctx, machines) def keyscan_check(ctx, machines): locks = list_locks() current_locks = {} for lock in locks: current_locks[lock['name']] = lock if hasattr(ctx, 'all'): if ctx.all: machines = current_locks.keys() for i, machine in enumerate(machines): if '@' in machine: _, machines[i] = machine.rsplit('@') args = ['ssh-keyscan', '-t', 'rsa'] args.extend(machines) p = subprocess.Popen( args=args, stdout=subprocess.PIPE, ) out, err = p.communicate() return (out, current_locks) def update_keys(ctx, out, current_locks): ret = 0 for key_entry in out.splitlines(): hostname, pubkey = key_entry.split(' ', 1) # TODO: separate out user full_name = 'ubuntu@{host}'.format(host=hostname) log.info('Checking %s', full_name) assert full_name in current_locks, 'host is not in the database!' if current_locks[full_name]['sshpubkey'] != pubkey: log.info('New key found. Updating...') if not update_lock(ctx, full_name, sshpubkey=pubkey): log.error('failed to update %s!', full_name) ret = 1 return ret def scan_for_locks(ctx, machines): out, current_locks = keyscan_check(ctx, machines) return update_keys(ctx, out, current_locks) def do_summary(ctx): lockd = collections.defaultdict(lambda: [0, 0, 'unknown']) for l in list_locks(): if ctx.machine_type and l['type'] != ctx.machine_type: continue who = l['locked_by'] if l['locked'] == 1 else '(free)', l['type'] lockd[who][0] += 1 lockd[who][1] += l['up'] # up is 1 or 0 lockd[who][2] = l['type'] locks = sorted([p for p in lockd.iteritems() ], key=lambda sort: (sort[1][2], sort[1][0])) total_count, total_up = 0, 0 print "TYPE COUNT UP OWNER" for (owner, (count, upcount, machinetype)) in locks: # if machinetype == spectype: print "{machinetype:8s} {count:3d} {up:3d} {owner}".format( count=count, up=upcount, owner=owner[0], machinetype=machinetype) total_count += count total_up += upcount print " --- ---" print "{cnt:12d} {up:3d}".format(cnt=total_count, up=total_up) def decanonicalize_hostname(s): if re.match('ubuntu@.*\.front\.sepia\.ceph\.com', s): s = s[len('ubuntu@'): -len('.front.sepia.ceph.com')] return s def _get_downburst_exec(): """ First check for downburst in the user's path. Then check in ~/src, ~ubuntu/src, and ~teuthology/src. Return '' if no executable downburst is found. """ path = os.environ.get('PATH', None) if path: for p in os.environ.get('PATH', '').split(os.pathsep): pth = os.path.join(p, 'downburst') if os.access(pth, os.X_OK): return pth import pwd little_old_me = pwd.getpwuid(os.getuid()).pw_name for user in [little_old_me, 'ubuntu', 'teuthology']: pth = "/home/%s/src/downburst/virtualenv/bin/downburst" % user if os.access(pth, os.X_OK): return pth return '' # # Use downburst to create a virtual machine # def create_if_vm(ctx, machine_name): status_info = ls.get_status(ctx, machine_name) phys_host = status_info['vpshost'] if not phys_host: return False os_type = get_distro(ctx) os_version = get_distro_version(ctx) createMe = decanonicalize_hostname(machine_name) with tempfile.NamedTemporaryFile() as tmp: try: lfile = ctx.downburst_conf with open(lfile) as downb_yaml: lcnfg = yaml.safe_load(downb_yaml) if lcnfg.keys() == ['downburst']: lcnfg = lcnfg['downburst'] except (TypeError, AttributeError): try: lcnfg = {} for tdict in ctx.config['downburst']: for key in tdict: lcnfg[key] = tdict[key] except (KeyError, AttributeError): lcnfg = {} except IOError: print "Error reading %s" % lfile return False distro = lcnfg.get('distro', os_type.lower()) distroversion = lcnfg.get('distroversion', os_version) file_info = {} file_info['disk-size'] = lcnfg.get('disk-size', '30G') file_info['ram'] = lcnfg.get('ram', '1.9G') file_info['cpus'] = lcnfg.get('cpus', 1) file_info['networks'] = lcnfg.get('networks', [{'source': 'front', 'mac': status_info['mac']}]) file_info['distro'] = distro file_info['distroversion'] = distroversion file_info['additional-disks'] = lcnfg.get( 'additional-disks', 3) file_info['additional-disks-size'] = lcnfg.get( 'additional-disks-size', '200G') file_info['arch'] = lcnfg.get('arch', 'x86_64') file_out = {'downburst': file_info} yaml.safe_dump(file_out, tmp) metadata = "--meta-data=%s" % tmp.name dbrst = _get_downburst_exec() if not dbrst: log.error("No downburst executable found.") return False p = subprocess.Popen([dbrst, '-c', phys_host, 'create', metadata, createMe], stdout=subprocess.PIPE, stderr=subprocess.PIPE,) owt, err = p.communicate() if err: log.info("Downburst completed on %s: %s" % (machine_name, err)) else: log.info("%s created: %s" % (machine_name, owt)) # If the guest already exists first destroy then re-create: if 'exists' in err: log.info("Guest files exist. Re-creating guest: %s" % (machine_name)) destroy_if_vm(ctx, machine_name) create_if_vm(ctx, machine_name) return True # # Use downburst to destroy a virtual machine # def destroy_if_vm(ctx, machine_name): """ Return False only on vm downburst failures. """ status_info = ls.get_status(ctx, machine_name) phys_host = status_info['vpshost'] if not phys_host: return True destroyMe = decanonicalize_hostname(machine_name) dbrst = _get_downburst_exec() if not dbrst: log.error("No downburst executable found.") return False p = subprocess.Popen([dbrst, '-c', phys_host, 'destroy', destroyMe], stdout=subprocess.PIPE, stderr=subprocess.PIPE,) owt, err = p.communicate() if err: log.error(err) return False else: log.info("%s destroyed: %s" % (machine_name, owt)) return True