import argparse import json import logging import subprocess import yaml import re import collections import os import time import requests import urllib from distutils.spawn import find_executable import teuthology from . import misc from . import provision from .config import config from .lockstatus import get_status log = logging.getLogger(__name__) # Don't need to see connection pool INFO messages logging.getLogger("requests.packages.urllib3.connectionpool").setLevel( logging.WARNING) is_vpm = lambda name: 'vpm' in name def get_distro_from_downburst(): """ Return a table of valid distros. If downburst is in path use it. If either downburst is unavailable, or if downburst is unable to produce a json list, then use a default table. """ default_table = {u'rhel_minimal': [u'6.4', u'6.5'], u'fedora': [u'17', u'18', u'19', u'20'], u'centos': [u'6.3', u'6.4', u'6.5', u'7.0'], u'opensuse': [u'12.2'], u'rhel': [u'6.3', u'6.4', u'6.5', u'7.0', u'7beta'], u'centos_minimal': [u'6.4', u'6.5'], u'ubuntu': [u'8.04(hardy)', u'9.10(karmic)', u'10.04(lucid)', u'10.10(maverick)', u'11.04(natty)', u'11.10(oneiric)', u'12.04(precise)', u'12.10(quantal)', u'13.04(raring)', u'13.10(saucy)', u'14.04(trusty)', u'utopic(utopic)'], u'sles': [u'11-sp2'], u'debian': [u'6.0', u'7.0']} executable_cmd = find_executable('downburst') if not executable_cmd: log.info('Using default values for supported os_type/os_version') return default_table try: output = subprocess.check_output([executable_cmd, 'list-json']) downburst_data = json.loads(output) return downburst_data except (subprocess.CalledProcessError, OSError): log.info('Using default values for supported os_type/os_version') return default_table def vps_version_or_type_valid(machine_type, os_type, os_version): """ Check os-type and os-version parameters when locking a vps. Os-type will always be set (defaults to ubuntu). In the case where downburst does not handle list-json (an older version of downburst, for instance), a message is printed and this checking is skipped (so that this code should behave as it did before this check was added). """ if not machine_type == 'vps': return True valid_os_and_version = get_distro_from_downburst() if os_type not in valid_os_and_version: log.error('os-type is invalid') return False if not validate_distro_version(os_version, valid_os_and_version[os_type]): log.error("os-version '%s' is invalid", os_version) return False return True def validate_distro_version(version, supported_versions): """ Return True if the version is valid. For Ubuntu, possible supported version values are of the form '12.04 (precise)' where either the number of the version name is acceptable. """ if version in supported_versions: return True for parts in supported_versions: part = parts.split('(') if len(part) == 2: if version == part[0]: return True if version == part[1][0:len(part[1])-1]: return True def main(ctx): if ctx.verbose: teuthology.log.setLevel(logging.DEBUG) misc.read_config(ctx) ret = 0 user = ctx.owner machines = [misc.canonicalize_hostname(m, user=False) for m in ctx.machines] machines_to_update = [] if ctx.targets: try: with file(ctx.targets) as f: g = yaml.safe_load_all(f) for new in g: if 'targets' in new: for t in new['targets'].iterkeys(): machines.append(t) except IOError as e: raise argparse.ArgumentTypeError(str(e)) if ctx.f: assert ctx.lock or ctx.unlock, \ '-f is only supported by --lock and --unlock' if machines: assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \ or ctx.update, \ 'machines cannot be specified with that operation' else: assert ctx.num_to_lock or ctx.list or ctx.list_targets or \ ctx.summary or ctx.brief, \ 'machines must be specified for that operation' if ctx.all: assert ctx.list or ctx.list_targets or ctx.brief, \ '--all can only be used with --list, --list-targets, and --brief' assert ctx.owner is None, \ '--all and --owner are mutually exclusive' assert not machines, \ '--all and listing specific machines are incompatible' if ctx.num_to_lock: assert ctx.machine_type, \ 'must specify machine type to lock' if ctx.brief or ctx.list or ctx.list_targets: assert ctx.desc is None, '--desc does nothing with --list/--brief' if machines: statuses = [] for machine in machines: machine = misc.canonicalize_hostname(machine) status = get_status(machine) if status: statuses.append(status) else: log.error("Lockserver doesn't know about machine: %s" % machine) # Delete this variable to avoid linter errors when we redefine it # in a list comprehension below del machine else: statuses = list_locks() vmachines = [] for vmachine in statuses: if vmachine['vm_host']: if vmachine['locked']: vmachines.append(vmachine['name']) if vmachines: # Avoid ssh-keyscans for everybody when listing all machines # Listing specific machines will update the keys. if machines: do_update_keys(vmachines) statuses = [get_status(machine) for machine in machines] else: statuses = list_locks() if statuses: if ctx.machine_type: statuses = [_status for _status in statuses if _status['machine_type'] == ctx.machine_type] if not machines and ctx.owner is None and not ctx.all: ctx.owner = misc.get_user() if ctx.owner is not None: statuses = [_status for _status in statuses if _status['locked_by'] == ctx.owner] if ctx.status is not None: statuses = [_status for _status in statuses if _status['up'] == (ctx.status == 'up')] if ctx.locked is not None: statuses = [_status for _status in statuses if _status['locked'] == (ctx.locked == 'true')] if ctx.desc is not None: statuses = [_status for _status in statuses if _status['description'] == ctx.desc] if ctx.desc_pattern is not None: statuses = [_status for _status in statuses if _status['description'] is not None and _status['description'].find(ctx.desc_pattern) >= 0] # When listing, only show the vm_host's name, not every detail for s in statuses: if not s.get('is_vm', False): continue vm_host_name = s.get('vm_host', dict())['name'] if vm_host_name: s['vm_host'] = vm_host_name if ctx.list: print json.dumps(statuses, indent=4) elif ctx.brief: for s in sorted(statuses, key=lambda s: s.get('name')): locked = "un" if s['locked'] == 0 else " " mo = re.match('\w+@(\w+?)\..*', s['name']) host = mo.group(1) if mo else s['name'] print '{host} {locked}locked {owner} "{desc}"'.format( locked=locked, host=host, owner=s['locked_by'], desc=s['description']) else: frag = {'targets': {}} for f in statuses: frag['targets'][f['name']] = f['ssh_pub_key'] print yaml.safe_dump(frag, default_flow_style=False) else: log.error('error retrieving lock statuses') ret = 1 elif ctx.summary: do_summary(ctx) return 0 elif ctx.lock: if not vps_version_or_type_valid(ctx.machine_type, ctx.os_type, ctx.os_version): log.error('Invalid os-type or version detected -- lock failed') return 1 for machine in machines: if not lock_one(machine, user, ctx.desc): ret = 1 if not ctx.f: return ret else: machines_to_update.append(machine) provision.create_if_vm(ctx, machine) elif ctx.unlock: if ctx.owner is None and user is None: user = misc.get_user() # If none of them are vpm, do them all in one shot if not filter(is_vpm, machines): res = unlock_many(machines, user) return 0 if res else 1 for machine in machines: if not unlock_one(ctx, machine, user): ret = 1 if not ctx.f: return ret else: machines_to_update.append(machine) elif ctx.num_to_lock: result = lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user, ctx.desc, ctx.os_type, ctx.os_version) if not result: ret = 1 else: machines_to_update = result.keys() if ctx.machine_type == 'vps': shortnames = ' '.join( [misc.decanonicalize_hostname(name) for name in result.keys()] ) if len(result) < ctx.num_to_lock: log.error("Locking failed.") for machn in result: unlock_one(ctx, machn) ret = 1 else: log.info("Successfully Locked:\n%s\n" % shortnames) log.info( "Unable to display keys at this time (virtual " + "machines are booting).") log.info( "Please run teuthology-lock --list-targets %s once " + "these machines come up.", shortnames) else: print yaml.safe_dump( dict(targets=result), default_flow_style=False) elif ctx.update: assert ctx.desc is not None or ctx.status is not None, \ 'you must specify description or status to update' assert ctx.owner is None, 'only description and status may be updated' machines_to_update = machines if ctx.desc is not None or ctx.status is not None: for machine in machines_to_update: update_lock(machine, ctx.desc, ctx.status) return ret def lock_many(ctx, num, machine_type, user=None, description=None, os_type=None, os_version=None, arch=None): if user is None: user = misc.get_user() if not vps_version_or_type_valid(ctx.machine_type, ctx.os_type, ctx.os_version): log.error('Invalid os-type or version detected -- lock failed') return # In the for loop below we can safely query for all bare-metal machine_type # values at once. So, if we're being asked for 'plana,mira,burnupi', do it # all in one shot. If we are passed 'plana,mira,burnupi,vps', do one query # for 'plana,mira,burnupi' and one for 'vps' machine_types_list = misc.get_multi_machine_types(machine_type) if machine_types_list == ['vps']: machine_types = machine_types_list elif 'vps' in machine_types_list: machine_types_non_vps = list(machine_types_list) machine_types_non_vps.remove('vps') machine_types_non_vps = '|'.join(machine_types_non_vps) machine_types = [machine_types_non_vps, 'vps'] else: machine_types_str = '|'.join(machine_types_list) machine_types = [machine_types_str, ] for machine_type in machine_types: uri = os.path.join(config.lock_server, 'nodes', 'lock_many', '') data = dict( locked_by=user, count=num, machine_type=machine_type, description=description, ) # Only query for os_type/os_version if non-vps, since in that case we # just create them. if machine_type != 'vps': if os_type: data['os_type'] = os_type if os_version: data['os_version'] = os_version if arch: data['arch'] = arch log.debug("lock_many request: %s", repr(data)) response = requests.post( uri, data=json.dumps(data), headers={'content-type': 'application/json'}, ) if response.ok: machines = {misc.canonicalize_hostname(machine['name']): machine['ssh_pub_key'] for machine in response.json()} log.debug('locked {machines}'.format( machines=', '.join(machines.keys()))) if machine_type == 'vps': ok_machs = {} for machine in machines: if provision.create_if_vm(ctx, machine): ok_machs[machine] = machines[machine] else: log.error('Unable to create virtual machine: %s', machine) unlock_one(ctx, machine) return ok_machs return machines elif response.status_code == 503: log.error('Insufficient nodes available to lock %d %s nodes.', num, machine_type) log.error(response.text) else: log.error('Could not lock %d %s nodes, reason: unknown.', num, machine_type) return [] def lock_one(name, user=None, description=None): name = misc.canonicalize_hostname(name, user=None) if user is None: user = misc.get_user() request = dict(name=name, locked=True, locked_by=user, description=description) uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '') response = requests.put(uri, json.dumps(request)) success = response.ok if success: log.debug('locked %s as %s', name, user) else: try: reason = response.json().get('message') except ValueError: reason = str(response.status_code) log.error('failed to lock {node}. reason: {reason}'.format( node=name, reason=reason)) return response def unlock_many(names, user): fixed_names = [misc.canonicalize_hostname(name, user=None) for name in names] names = fixed_names uri = os.path.join(config.lock_server, 'nodes', 'unlock_many', '') data = dict( locked_by=user, names=names, ) response = requests.post( uri, data=json.dumps(data), headers={'content-type': 'application/json'}, ) if response.ok: log.debug("Unlocked: %s", ', '.join(names)) else: log.error("Failed to unlock: %s", ', '.join(names)) return response.ok def unlock_one(ctx, name, user=None): if user is None: user = misc.get_user() name = misc.canonicalize_hostname(name, user=None) request = dict(name=name, locked=False, locked_by=user, description=None) uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '') response = requests.put(uri, json.dumps(request)) success = response.ok if success: log.debug('unlocked %s', name) if not provision.destroy_if_vm(ctx, name): log.error('downburst destroy failed for %s', name) log.info('%s is not locked' % name) else: try: reason = response.json().get('message') except ValueError: reason = str(response.status_code) log.error('failed to unlock {node}. reason: {reason}'.format( node=name, reason=reason)) return success def list_locks(keyed_by_name=False, **kwargs): uri = os.path.join(config.lock_server, 'nodes', '') if kwargs: if 'machine_type' in kwargs: kwargs['machine_type'] = kwargs['machine_type'].replace(',','|') uri += '?' + urllib.urlencode(kwargs) try: response = requests.get(uri) except requests.ConnectionError: success = False else: success = response.ok if success: if not keyed_by_name: return response.json() else: return {node['name']: node for node in response.json()} return None def update_lock(name, description=None, status=None, ssh_pub_key=None): name = misc.canonicalize_hostname(name, user=None) status_info = get_status(name) if status_info['is_vm']: ssh_key = None while not ssh_key: time.sleep(10) ssh_key = ssh_keyscan([name]) updated = {} if description is not None: updated['description'] = description if status is not None: updated['up'] = (status == 'up') if ssh_pub_key is not None: updated['ssh_pub_key'] = ssh_pub_key if updated: uri = os.path.join(config.lock_server, 'nodes', name, '') response = requests.put( uri, json.dumps(updated)) return response.ok return True def update_inventory(node_dict): """ Like update_lock(), but takes a dict and doesn't try to do anything smart by itself """ name = node_dict.get('name') if not name: raise ValueError("must specify name") if not config.lock_server: return uri = os.path.join(config.lock_server, 'nodes', name, '') log.info("Updating %s on lock server", name) response = requests.put( uri, json.dumps(node_dict)) if response.status_code == 404: log.info("Creating new node %s on lock server", name) uri = os.path.join(config.lock_server, 'nodes', '') response = requests.post( uri, json.dumps(node_dict)) if not response.ok: log.error("Node update/creation failed for %s: %s", name, response.text) return response.ok def ssh_keyscan(hostnames): """ Fetch the SSH public key of one or more hosts """ if isinstance(hostnames, basestring): raise TypeError("'hostnames' must be a list") hostnames = [misc.canonicalize_hostname(name, user=None) for name in hostnames] args = ['ssh-keyscan', '-t', 'rsa'] + hostnames p = subprocess.Popen( args=args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) p.wait() keys_dict = dict() for line in p.stderr.readlines(): if not line.startswith('#'): log.error(line) for line in p.stdout.readlines(): host, key = line.strip().split(' ', 1) keys_dict[host] = key return keys_dict def updatekeys(args): loglevel = logging.DEBUG if args['--verbose'] else logging.INFO logging.basicConfig( level=loglevel, ) all_ = args['--all'] if all_: machines = [] elif args['']: machines = [misc.canonicalize_hostname(m, user=None) for m in args['']] elif args['--targets']: targets = args['--targets'] with file(targets) as f: docs = yaml.safe_load_all(f) for doc in docs: machines = [n for n in doc.get('targets', dict()).iterkeys()] return do_update_keys(machines, all_) def do_update_keys(machines, all_=False): reference = list_locks(keyed_by_name=True) if all_: machines = reference.keys() keys_dict = ssh_keyscan(machines) return push_new_keys(keys_dict, reference) def push_new_keys(keys_dict, reference): ret = 0 for hostname, pubkey in keys_dict.iteritems(): log.info('Checking %s', hostname) if reference[hostname]['ssh_pub_key'] != pubkey: log.info('New key found. Updating...') if not update_lock(hostname, ssh_pub_key=pubkey): log.error('failed to update %s!', hostname) ret = 1 return ret def do_summary(ctx): lockd = collections.defaultdict(lambda: [0, 0, 'unknown']) if ctx.machine_type: locks = list_locks(machine_type=ctx.machine_type) else: locks = list_locks() for l in locks: who = l['locked_by'] if l['locked'] == 1 \ else '(free)', l['machine_type'] lockd[who][0] += 1 lockd[who][1] += 1 if l['up'] else 0 lockd[who][2] = l['machine_type'] locks = sorted([p for p in lockd.iteritems() ], key=lambda sort: (sort[1][2], sort[1][0])) total_count, total_up = 0, 0 print "TYPE COUNT UP OWNER" for (owner, (count, upcount, machinetype)) in locks: # if machinetype == spectype: print "{machinetype:8s} {count:3d} {up:3d} {owner}".format( count=count, up=upcount, owner=owner[0], machinetype=machinetype) total_count += count total_up += upcount print " --- ---" print "{cnt:12d} {up:3d}".format(cnt=total_count, up=total_up)