ceph/teuthology/lock.py

import argparse
import json
import logging
import subprocess
import yaml
import re
import collections
import os
import time
import requests
import urllib

import teuthology
from . import misc
from . import provision
from .config import config
from .lockstatus import get_status

log = logging.getLogger(__name__)
# Don't need to see connection pool INFO messages
logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
    logging.WARNING)


def main(ctx):
    if ctx.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    misc.read_config(ctx)

    ret = 0
    user = ctx.owner
    machines = [misc.canonicalize_hostname(m, user=False)
                for m in ctx.machines]
    machines_to_update = []

    if ctx.targets:
        try:
            with file(ctx.targets) as f:
                g = yaml.safe_load_all(f)
                for new in g:
                    if 'targets' in new:
                        for t in new['targets'].iterkeys():
                            machines.append(t)
        except IOError as e:
            raise argparse.ArgumentTypeError(str(e))

    if ctx.f:
        assert ctx.lock or ctx.unlock, \
            '-f is only supported by --lock and --unlock'
    if machines:
        assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \
            or ctx.update, \
            'machines cannot be specified with that operation'
    else:
        assert ctx.num_to_lock or ctx.list or ctx.list_targets or \
            ctx.summary or ctx.brief, \
            'machines must be specified for that operation'
    if ctx.all:
        assert ctx.list or ctx.list_targets or ctx.brief, \
            '--all can only be used with --list, --list-targets, and --brief'
        assert ctx.owner is None, \
            '--all and --owner are mutually exclusive'
        assert not machines, \
            '--all and listing specific machines are incompatible'
    if ctx.num_to_lock:
        assert ctx.machine_type, \
            'must specify machine type to lock'

    if ctx.brief or ctx.list or ctx.list_targets:
        assert ctx.desc is None, '--desc does nothing with --list/--brief'

        if machines:
            statuses = []
            for machine in machines:
                status = get_status(machine)
                if status:
                    statuses.append(status)
                else:
                    log.error("Lockserver doesn't know about machine: %s" %
                              machine)
        else:
            statuses = list_locks()
        vmachines = []

        for vmachine in statuses:
            if vmachine['vm_host']:
                if vmachine['locked']:
                    vmachines.append(vmachine['name'])
        if vmachines:
            # Avoid ssh-keyscans for everybody when listing all machines
            # Listing specific machines will update the keys.
            if machines:
                do_update_keys(vmachines)
                statuses = [get_status(machine)
                            for machine in machines]
            else:
                statuses = list_locks()
        if statuses:
            if ctx.machine_type:
                statuses = [_status for _status in statuses
                            if _status['type'] == ctx.machine_type]
            if not machines and ctx.owner is None and not ctx.all:
                ctx.owner = misc.get_user()
            if ctx.owner is not None:
                statuses = [_status for _status in statuses
                            if _status['locked_by'] == ctx.owner]
            if ctx.status is not None:
                statuses = [_status for _status in statuses
                            if _status['up'] == (ctx.status == 'up')]
            if ctx.locked is not None:
                statuses = [_status for _status in statuses
                            if _status['locked'] == (ctx.locked == 'true')]
            if ctx.desc is not None:
                statuses = [_status for _status in statuses
                            if _status['description'] == ctx.desc]
            if ctx.desc_pattern is not None:
                statuses = [_status for _status in statuses
                            if _status['description'] is not None and
                            _status['description'].find(ctx.desc_pattern) >= 0]
            if ctx.list:
                    print json.dumps(statuses, indent=4)

            elif ctx.brief:
                for s in statuses:
                    locked = "un" if s['locked'] == 0 else "  "
                    mo = re.match('\w+@(\w+?)\..*', s['name'])
                    host = mo.group(1) if mo else s['name']
                    print '{host} {locked}locked {owner} "{desc}"'.format(
                        locked=locked, host=host,
                        owner=s['locked_by'], desc=s['description'])

            else:
                frag = {'targets': {}}
                for f in statuses:
                    frag['targets'][f['name']] = f['ssh_pub_key']
                print yaml.safe_dump(frag, default_flow_style=False)
        else:
            log.error('error retrieving lock statuses')
            ret = 1

    elif ctx.summary:
        do_summary(ctx)
        return 0

    elif ctx.lock:
        for machine in machines:
            if not lock_one(machine, user):
                ret = 1
                if not ctx.f:
                    return ret
            else:
                machines_to_update.append(machine)
                provision.create_if_vm(ctx, machine)
    elif ctx.unlock:
        for machine in machines:
            if not unlock_one(ctx, machine, user):
                ret = 1
                if not ctx.f:
                    return ret
            else:
                machines_to_update.append(machine)
    elif ctx.num_to_lock:
        result = lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user)
        if not result:
            ret = 1
        else:
            machines_to_update = result.keys()
            if ctx.machine_type == 'vps':
                shortnames = ' '.join(
                    [name.split('@')[1].split('.')[0]
                        for name in result.keys()]
                )
                if len(result) < ctx.num_to_lock:
                    log.error("Locking failed.")
                    for machn in result:
                        unlock_one(ctx, machn)
                    ret = 1
                else:
                    log.info("Successfully Locked:\n%s\n" % shortnames)
                    log.info(
                        "Unable to display keys at this time (virtual " +
                        "machines are booting).")
                    log.info(
                        "Please run teuthology-lock --list-targets %s once " +
                        "these machines come up.",
                        shortnames)
            else:
                print yaml.safe_dump(
                    dict(targets=result),
                    default_flow_style=False)
    elif ctx.update:
        assert ctx.desc is not None or ctx.status is not None, \
            'you must specify description or status to update'
        assert ctx.owner is None, 'only description and status may be updated'
        machines_to_update = machines

    if ctx.desc is not None or ctx.status is not None:
        for machine in machines_to_update:
            update_lock(machine, ctx.desc, ctx.status)

    return ret


def lock_many(ctx, num, machinetype, user=None, description=None):
    machinetypes = misc.get_multi_machine_types(machinetype)
    if user is None:
        user = misc.get_user()
    for machinetype in machinetypes:
        uri = os.path.join(config.lock_server, 'nodes', 'lock_many', '')
        response = requests.post(
            uri,
            json.dumps(
                dict(
                    locked_by=user,
                    count=num,
                    machine_type=machinetype,
                    description=description,
                ))
        )
        if response.ok:
            machines = {machine['name']: machine['ssh_pub_key']
                        for machine in response.json()}
            log.debug('locked {machines}'.format(
                machines=', '.join(machines.keys())))
            if machinetype == 'vps':
                ok_machs = {}
                for machine in machines:
                    if provision.create_if_vm(ctx, machine):
                        ok_machs[machine] = machines[machine]
                    else:
                        log.error('Unable to create virtual machine: %s',
                                  machine)
                        unlock_one(ctx, machine)
                return ok_machs
            return machines
        elif response.status_code == 503:
            log.error('Insufficient nodes available to lock %d %s nodes.',
                      num, machinetype)
            log.error(response.text)
        else:
            log.error('Could not lock %d %s nodes, reason: unknown.',
                      num, machinetype)
    return []


def lock_one(name, user=None, description=None):
    if user is None:
        user = misc.get_user()
    request = dict(name=name, locked=True, locked_by=user,
                   description=description)
    uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '')
    response = requests.put(uri, json.dumps(request))
    success = response.ok
    if success:
        log.debug('locked %s as %s', name, user)
    else:
        try:
            reason = response.json().get('message')
        except ValueError:
            reason = str(response.status_code)
        log.error('failed to lock {node}. reason: {reason}'.format(
            node=name, reason=reason))
    return response


def unlock_one(ctx, name, user=None):
    if user is None:
        user = misc.get_user()
    request = dict(name=name, locked=False, locked_by=user, description=None)
    uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '')
    response = requests.put(uri, json.dumps(request))
    success = response.ok
    if success:
        log.debug('unlocked %s', name)
        if not provision.destroy_if_vm(ctx, name):
            log.error('downburst destroy failed for %s', name)
            log.info('%s is not locked' % name)
    else:
        try:
            reason = response.json().get('message')
        except ValueError:
            reason = str(response.status_code)
        log.error('failed to unlock {node}. reason: {reason}'.format(
            node=name, reason=reason))
    return success


def list_locks(keyed_by_name=False, **kwargs):
    uri = os.path.join(config.lock_server, 'nodes', '')
    if kwargs:
        uri += '?' + urllib.urlencode(kwargs)
    response = requests.get(uri)
    success = response.ok
    if success:
        if not keyed_by_name:
            return response.json()
        else:
            return {node['name']: node
                    for node in response.json()}
    return None


def update_lock(name, description=None, status=None, ssh_pub_key=None):
    status_info = get_status(name)
    if status_info['is_vm']:
        ssh_key = None
        while not ssh_key:
            time.sleep(10)
            ssh_key = ssh_keyscan(name)
    updated = {}
    if description is not None:
        updated['description'] = description
    if status is not None:
        updated['up'] = (status == 'up')
    if ssh_pub_key is not None:
        updated['ssh_pub_key'] = ssh_pub_key

    if updated:
        uri = os.path.join(config.lock_server, 'nodes', name, '')
        response = requests.put(
            uri,
            json.dumps(updated))
        return response.ok
    return True


def ssh_keyscan(hostnames):
    """
    Fetch the SSH public key of one or more hosts
    """
    args = ['ssh-keyscan', '-t', 'rsa']
    if isinstance(hostnames, basestring):
        args.append(hostnames)
    else:
        args.extend(hostnames)
    p = subprocess.Popen(
        args=args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    p.wait()

    keys_dict = dict()
    for line in p.stdout.readlines():
        host, key = line.strip().split(' ', 1)
        keys_dict[host] = key
    return keys_dict


def updatekeys(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(
        level=loglevel,
    )

    misc.read_config(ctx)

    machines = [misc.canonicalize_hostname(m) for m in ctx.machines]

    if ctx.targets:
        try:
            with file(ctx.targets) as f:
                g = yaml.safe_load_all(f)
                for new in g:
                    if 'targets' in new:
                        for t in new['targets'].iterkeys():
                            machines.append(t)
        except IOError as e:
            raise argparse.ArgumentTypeError(str(e))

    return do_update_keys(machines)


def do_update_keys(machines):
    reference = list_locks(keyed_by_name=True, up=True)
    if not machines:
        machines = reference.keys()
    keys_dict = ssh_keyscan(machines)
    return push_new_keys(keys_dict, reference)


def push_new_keys(keys_dict, reference):
    ret = 0
    for hostname, pubkey in keys_dict.iteritems():
        log.info('Checking %s', hostname)
        if reference[hostname]['ssh_pub_key'] != pubkey:
            log.info('New key found. Updating...')
            if not update_lock(hostname, ssh_pub_key=pubkey):
                log.error('failed to update %s!', hostname)
                ret = 1
    return ret


def do_summary(ctx):
    lockd = collections.defaultdict(lambda: [0, 0, 'unknown'])
    for l in list_locks(ctx.machine_type):
        who = l['locked_by'] if l['locked'] == 1 \
            else '(free)', l['machine_type']
        lockd[who][0] += 1
        lockd[who][1] += 1 if l['up'] else 0
        lockd[who][2] = l['machine_type']

    locks = sorted([p for p in lockd.iteritems()
                    ], key=lambda sort: (sort[1][2], sort[1][0]))
    total_count, total_up = 0, 0
    print "TYPE     COUNT  UP  OWNER"

    for (owner, (count, upcount, machinetype)) in locks:
            # if machinetype == spectype:
            print "{machinetype:8s} {count:3d}  {up:3d}  {owner}".format(
                count=count, up=upcount, owner=owner[0],
                machinetype=machinetype)
            total_count += count
            total_up += upcount

    print "         ---  ---"
    print "{cnt:12d}  {up:3d}".format(cnt=total_count, up=total_up)