mirror of
https://github.com/ceph/ceph
synced 2025-01-31 07:22:56 +00:00
734580d546
Signed-off-by: Zack Cerza <zack.cerza@inktank.com>
611 lines
22 KiB
Python
611 lines
22 KiB
Python
import argparse
|
|
import json
|
|
import logging
|
|
import subprocess
|
|
import yaml
|
|
import re
|
|
import collections
|
|
import os
|
|
import time
|
|
import requests
|
|
import urllib
|
|
from distutils.spawn import find_executable
|
|
|
|
import teuthology
|
|
from . import misc
|
|
from . import provision
|
|
from .config import config
|
|
from .lockstatus import get_status
|
|
|
|
log = logging.getLogger(__name__)
|
|
# Don't need to see connection pool INFO messages
|
|
logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
|
|
logging.WARNING)
|
|
|
|
is_vpm = lambda name: 'vpm' in name
|
|
|
|
def get_distro_from_downburst():
|
|
"""
|
|
Return a table of valid distros.
|
|
|
|
If downburst is in path use it. If either downburst is unavailable,
|
|
or if downburst is unable to produce a json list, then use a default
|
|
table.
|
|
"""
|
|
default_table = {u'rhel_minimal': [u'6.4', u'6.5'],
|
|
u'fedora': [u'17', u'18', u'19', u'20'],
|
|
u'centos': [u'6.3', u'6.4', u'6.5', u'7.0'],
|
|
u'opensuse': [u'12.2'],
|
|
u'rhel': [u'6.3', u'6.4', u'6.5', u'7.0', u'7beta'],
|
|
u'centos_minimal': [u'6.4', u'6.5'],
|
|
u'ubuntu': [u'8.04(hardy)', u'9.10(karmic)',
|
|
u'10.04(lucid)', u'10.10(maverick)',
|
|
u'11.04(natty)', u'11.10(oneiric)',
|
|
u'12.04(precise)', u'12.10(quantal)',
|
|
u'13.04(raring)', u'13.10(saucy)',
|
|
u'14.04(trusty)', u'utopic(utopic)'],
|
|
u'sles': [u'11-sp2'],
|
|
u'debian': [u'6.0', u'7.0']}
|
|
executable_cmd = find_executable('downburst')
|
|
if not executable_cmd:
|
|
log.info('Using default values for supported os_type/os_version')
|
|
return default_table
|
|
try:
|
|
output = subprocess.check_output([executable_cmd, 'list-json'])
|
|
downburst_data = json.loads(output)
|
|
return downburst_data
|
|
except (subprocess.CalledProcessError, OSError):
|
|
log.info('Using default values for supported os_type/os_version')
|
|
return default_table
|
|
|
|
|
|
def vps_version_or_type_valid(machine_type, os_type, os_version):
|
|
"""
|
|
Check os-type and os-version parameters when locking a vps.
|
|
Os-type will always be set (defaults to ubuntu).
|
|
|
|
In the case where downburst does not handle list-json (an older version
|
|
of downburst, for instance), a message is printed and this checking
|
|
is skipped (so that this code should behave as it did before this
|
|
check was added).
|
|
"""
|
|
if not machine_type == 'vps':
|
|
return True
|
|
valid_os_and_version = get_distro_from_downburst()
|
|
if os_type not in valid_os_and_version:
|
|
log.error('os-type is invalid')
|
|
return False
|
|
if not validate_distro_version(os_version,
|
|
valid_os_and_version[os_type]):
|
|
log.error("os-version '%s' is invalid", os_version)
|
|
return False
|
|
return True
|
|
|
|
def validate_distro_version(version, supported_versions):
|
|
"""
|
|
Return True if the version is valid. For Ubuntu, possible
|
|
supported version values are of the form '12.04 (precise)' where
|
|
either the number of the version name is acceptable.
|
|
"""
|
|
if version in supported_versions:
|
|
return True
|
|
for parts in supported_versions:
|
|
part = parts.split('(')
|
|
if len(part) == 2:
|
|
if version == part[0]:
|
|
return True
|
|
if version == part[1][0:len(part[1])-1]:
|
|
return True
|
|
|
|
def main(ctx):
|
|
if ctx.verbose:
|
|
teuthology.log.setLevel(logging.DEBUG)
|
|
|
|
misc.read_config(ctx)
|
|
|
|
ret = 0
|
|
user = ctx.owner
|
|
machines = [misc.canonicalize_hostname(m, user=False)
|
|
for m in ctx.machines]
|
|
machines_to_update = []
|
|
|
|
if ctx.targets:
|
|
try:
|
|
with file(ctx.targets) as f:
|
|
g = yaml.safe_load_all(f)
|
|
for new in g:
|
|
if 'targets' in new:
|
|
for t in new['targets'].iterkeys():
|
|
machines.append(t)
|
|
except IOError as e:
|
|
raise argparse.ArgumentTypeError(str(e))
|
|
|
|
if ctx.f:
|
|
assert ctx.lock or ctx.unlock, \
|
|
'-f is only supported by --lock and --unlock'
|
|
if machines:
|
|
assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \
|
|
or ctx.update, \
|
|
'machines cannot be specified with that operation'
|
|
else:
|
|
assert ctx.num_to_lock or ctx.list or ctx.list_targets or \
|
|
ctx.summary or ctx.brief, \
|
|
'machines must be specified for that operation'
|
|
if ctx.all:
|
|
assert ctx.list or ctx.list_targets or ctx.brief, \
|
|
'--all can only be used with --list, --list-targets, and --brief'
|
|
assert ctx.owner is None, \
|
|
'--all and --owner are mutually exclusive'
|
|
assert not machines, \
|
|
'--all and listing specific machines are incompatible'
|
|
if ctx.num_to_lock:
|
|
assert ctx.machine_type, \
|
|
'must specify machine type to lock'
|
|
|
|
if ctx.brief or ctx.list or ctx.list_targets:
|
|
assert ctx.desc is None, '--desc does nothing with --list/--brief'
|
|
|
|
if machines:
|
|
statuses = []
|
|
for machine in machines:
|
|
machine = misc.canonicalize_hostname(machine)
|
|
status = get_status(machine)
|
|
if status:
|
|
statuses.append(status)
|
|
else:
|
|
log.error("Lockserver doesn't know about machine: %s" %
|
|
machine)
|
|
# Delete this variable to avoid linter errors when we redefine it
|
|
# in a list comprehension below
|
|
del machine
|
|
else:
|
|
statuses = list_locks()
|
|
vmachines = []
|
|
|
|
for vmachine in statuses:
|
|
if vmachine['vm_host']:
|
|
if vmachine['locked']:
|
|
vmachines.append(vmachine['name'])
|
|
if vmachines:
|
|
# Avoid ssh-keyscans for everybody when listing all machines
|
|
# Listing specific machines will update the keys.
|
|
if machines:
|
|
do_update_keys(vmachines)
|
|
statuses = [get_status(machine)
|
|
for machine in machines]
|
|
else:
|
|
statuses = list_locks()
|
|
if statuses:
|
|
if ctx.machine_type:
|
|
statuses = [_status for _status in statuses
|
|
if _status['machine_type'] == ctx.machine_type]
|
|
if not machines and ctx.owner is None and not ctx.all:
|
|
ctx.owner = misc.get_user()
|
|
if ctx.owner is not None:
|
|
statuses = [_status for _status in statuses
|
|
if _status['locked_by'] == ctx.owner]
|
|
if ctx.status is not None:
|
|
statuses = [_status for _status in statuses
|
|
if _status['up'] == (ctx.status == 'up')]
|
|
if ctx.locked is not None:
|
|
statuses = [_status for _status in statuses
|
|
if _status['locked'] == (ctx.locked == 'true')]
|
|
if ctx.desc is not None:
|
|
statuses = [_status for _status in statuses
|
|
if _status['description'] == ctx.desc]
|
|
if ctx.desc_pattern is not None:
|
|
statuses = [_status for _status in statuses
|
|
if _status['description'] is not None and
|
|
_status['description'].find(ctx.desc_pattern) >= 0]
|
|
|
|
# When listing, only show the vm_host's name, not every detail
|
|
for s in statuses:
|
|
if not s.get('is_vm', False):
|
|
continue
|
|
vm_host_name = s.get('vm_host', dict())['name']
|
|
if vm_host_name:
|
|
s['vm_host'] = vm_host_name
|
|
if ctx.list:
|
|
print json.dumps(statuses, indent=4)
|
|
|
|
elif ctx.brief:
|
|
for s in sorted(statuses, key=lambda s: s.get('name')):
|
|
locked = "un" if s['locked'] == 0 else " "
|
|
mo = re.match('\w+@(\w+?)\..*', s['name'])
|
|
host = mo.group(1) if mo else s['name']
|
|
print '{host} {locked}locked {owner} "{desc}"'.format(
|
|
locked=locked, host=host,
|
|
owner=s['locked_by'], desc=s['description'])
|
|
|
|
else:
|
|
frag = {'targets': {}}
|
|
for f in statuses:
|
|
frag['targets'][f['name']] = f['ssh_pub_key']
|
|
print yaml.safe_dump(frag, default_flow_style=False)
|
|
else:
|
|
log.error('error retrieving lock statuses')
|
|
ret = 1
|
|
|
|
elif ctx.summary:
|
|
do_summary(ctx)
|
|
return 0
|
|
|
|
elif ctx.lock:
|
|
if not vps_version_or_type_valid(ctx.machine_type, ctx.os_type,
|
|
ctx.os_version):
|
|
log.error('Invalid os-type or version detected -- lock failed')
|
|
return 1
|
|
for machine in machines:
|
|
if not lock_one(machine, user, ctx.desc):
|
|
ret = 1
|
|
if not ctx.f:
|
|
return ret
|
|
else:
|
|
machines_to_update.append(machine)
|
|
provision.create_if_vm(ctx, machine)
|
|
elif ctx.unlock:
|
|
if ctx.owner is None and user is None:
|
|
user = misc.get_user()
|
|
# If none of them are vpm, do them all in one shot
|
|
if not filter(is_vpm, machines):
|
|
res = unlock_many(machines, user)
|
|
return 0 if res else 1
|
|
for machine in machines:
|
|
if not unlock_one(ctx, machine, user):
|
|
ret = 1
|
|
if not ctx.f:
|
|
return ret
|
|
else:
|
|
machines_to_update.append(machine)
|
|
elif ctx.num_to_lock:
|
|
result = lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user,
|
|
ctx.desc, ctx.os_type, ctx.os_version)
|
|
if not result:
|
|
ret = 1
|
|
else:
|
|
machines_to_update = result.keys()
|
|
if ctx.machine_type == 'vps':
|
|
shortnames = ' '.join(
|
|
[misc.decanonicalize_hostname(name) for name in
|
|
result.keys()]
|
|
)
|
|
if len(result) < ctx.num_to_lock:
|
|
log.error("Locking failed.")
|
|
for machn in result:
|
|
unlock_one(ctx, machn)
|
|
ret = 1
|
|
else:
|
|
log.info("Successfully Locked:\n%s\n" % shortnames)
|
|
log.info(
|
|
"Unable to display keys at this time (virtual " +
|
|
"machines are booting).")
|
|
log.info(
|
|
"Please run teuthology-lock --list-targets %s once " +
|
|
"these machines come up.",
|
|
shortnames)
|
|
else:
|
|
print yaml.safe_dump(
|
|
dict(targets=result),
|
|
default_flow_style=False)
|
|
elif ctx.update:
|
|
assert ctx.desc is not None or ctx.status is not None, \
|
|
'you must specify description or status to update'
|
|
assert ctx.owner is None, 'only description and status may be updated'
|
|
machines_to_update = machines
|
|
|
|
if ctx.desc is not None or ctx.status is not None:
|
|
for machine in machines_to_update:
|
|
update_lock(machine, ctx.desc, ctx.status)
|
|
|
|
return ret
|
|
|
|
|
|
def lock_many(ctx, num, machine_type, user=None, description=None,
|
|
os_type=None, os_version=None, arch=None):
|
|
if user is None:
|
|
user = misc.get_user()
|
|
|
|
if not vps_version_or_type_valid(ctx.machine_type, ctx.os_type,
|
|
ctx.os_version):
|
|
log.error('Invalid os-type or version detected -- lock failed')
|
|
return
|
|
|
|
# In the for loop below we can safely query for all bare-metal machine_type
|
|
# values at once. So, if we're being asked for 'plana,mira,burnupi', do it
|
|
# all in one shot. If we are passed 'plana,mira,burnupi,vps', do one query
|
|
# for 'plana,mira,burnupi' and one for 'vps'
|
|
machine_types_list = misc.get_multi_machine_types(machine_type)
|
|
if machine_types_list == ['vps']:
|
|
machine_types = machine_types_list
|
|
elif 'vps' in machine_types_list:
|
|
machine_types_non_vps = list(machine_types_list)
|
|
machine_types_non_vps.remove('vps')
|
|
machine_types_non_vps = '|'.join(machine_types_non_vps)
|
|
machine_types = [machine_types_non_vps, 'vps']
|
|
else:
|
|
machine_types_str = '|'.join(machine_types_list)
|
|
machine_types = [machine_types_str, ]
|
|
|
|
for machine_type in machine_types:
|
|
uri = os.path.join(config.lock_server, 'nodes', 'lock_many', '')
|
|
data = dict(
|
|
locked_by=user,
|
|
count=num,
|
|
machine_type=machine_type,
|
|
description=description,
|
|
)
|
|
# Only query for os_type/os_version if non-vps, since in that case we
|
|
# just create them.
|
|
if machine_type != 'vps':
|
|
if os_type:
|
|
data['os_type'] = os_type
|
|
if os_version:
|
|
data['os_version'] = os_version
|
|
if arch:
|
|
data['arch'] = arch
|
|
log.debug("lock_many request: %s", repr(data))
|
|
response = requests.post(
|
|
uri,
|
|
data=json.dumps(data),
|
|
headers={'content-type': 'application/json'},
|
|
)
|
|
if response.ok:
|
|
machines = {misc.canonicalize_hostname(machine['name']):
|
|
machine['ssh_pub_key'] for machine in response.json()}
|
|
log.debug('locked {machines}'.format(
|
|
machines=', '.join(machines.keys())))
|
|
if machine_type == 'vps':
|
|
ok_machs = {}
|
|
for machine in machines:
|
|
if provision.create_if_vm(ctx, machine):
|
|
ok_machs[machine] = machines[machine]
|
|
else:
|
|
log.error('Unable to create virtual machine: %s',
|
|
machine)
|
|
unlock_one(ctx, machine)
|
|
return ok_machs
|
|
return machines
|
|
elif response.status_code == 503:
|
|
log.error('Insufficient nodes available to lock %d %s nodes.',
|
|
num, machine_type)
|
|
log.error(response.text)
|
|
else:
|
|
log.error('Could not lock %d %s nodes, reason: unknown.',
|
|
num, machine_type)
|
|
return []
|
|
|
|
|
|
def lock_one(name, user=None, description=None):
|
|
name = misc.canonicalize_hostname(name, user=None)
|
|
if user is None:
|
|
user = misc.get_user()
|
|
request = dict(name=name, locked=True, locked_by=user,
|
|
description=description)
|
|
uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '')
|
|
response = requests.put(uri, json.dumps(request))
|
|
success = response.ok
|
|
if success:
|
|
log.debug('locked %s as %s', name, user)
|
|
else:
|
|
try:
|
|
reason = response.json().get('message')
|
|
except ValueError:
|
|
reason = str(response.status_code)
|
|
log.error('failed to lock {node}. reason: {reason}'.format(
|
|
node=name, reason=reason))
|
|
return response
|
|
|
|
|
|
def unlock_many(names, user):
|
|
fixed_names = [misc.canonicalize_hostname(name, user=None) for name in
|
|
names]
|
|
names = fixed_names
|
|
uri = os.path.join(config.lock_server, 'nodes', 'unlock_many', '')
|
|
data = dict(
|
|
locked_by=user,
|
|
names=names,
|
|
)
|
|
response = requests.post(
|
|
uri,
|
|
data=json.dumps(data),
|
|
headers={'content-type': 'application/json'},
|
|
)
|
|
if response.ok:
|
|
log.debug("Unlocked: %s", ', '.join(names))
|
|
else:
|
|
log.error("Failed to unlock: %s", ', '.join(names))
|
|
return response.ok
|
|
|
|
|
|
def unlock_one(ctx, name, user=None):
|
|
if user is None:
|
|
user = misc.get_user()
|
|
name = misc.canonicalize_hostname(name, user=None)
|
|
request = dict(name=name, locked=False, locked_by=user, description=None)
|
|
uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '')
|
|
response = requests.put(uri, json.dumps(request))
|
|
success = response.ok
|
|
if success:
|
|
log.debug('unlocked %s', name)
|
|
if not provision.destroy_if_vm(ctx, name):
|
|
log.error('downburst destroy failed for %s', name)
|
|
log.info('%s is not locked' % name)
|
|
else:
|
|
try:
|
|
reason = response.json().get('message')
|
|
except ValueError:
|
|
reason = str(response.status_code)
|
|
log.error('failed to unlock {node}. reason: {reason}'.format(
|
|
node=name, reason=reason))
|
|
return success
|
|
|
|
|
|
def list_locks(keyed_by_name=False, **kwargs):
|
|
uri = os.path.join(config.lock_server, 'nodes', '')
|
|
if kwargs:
|
|
if 'machine_type' in kwargs:
|
|
kwargs['machine_type'] = kwargs['machine_type'].replace(',','|')
|
|
uri += '?' + urllib.urlencode(kwargs)
|
|
try:
|
|
response = requests.get(uri)
|
|
except requests.ConnectionError:
|
|
success = False
|
|
else:
|
|
success = response.ok
|
|
if success:
|
|
if not keyed_by_name:
|
|
return response.json()
|
|
else:
|
|
return {node['name']: node
|
|
for node in response.json()}
|
|
return None
|
|
|
|
|
|
def update_lock(name, description=None, status=None, ssh_pub_key=None):
|
|
name = misc.canonicalize_hostname(name, user=None)
|
|
status_info = get_status(name)
|
|
if status_info['is_vm']:
|
|
ssh_key = None
|
|
while not ssh_key:
|
|
time.sleep(10)
|
|
ssh_key = ssh_keyscan([name])
|
|
updated = {}
|
|
if description is not None:
|
|
updated['description'] = description
|
|
if status is not None:
|
|
updated['up'] = (status == 'up')
|
|
if ssh_pub_key is not None:
|
|
updated['ssh_pub_key'] = ssh_pub_key
|
|
|
|
if updated:
|
|
uri = os.path.join(config.lock_server, 'nodes', name, '')
|
|
response = requests.put(
|
|
uri,
|
|
json.dumps(updated))
|
|
return response.ok
|
|
return True
|
|
|
|
|
|
def update_inventory(node_dict):
|
|
"""
|
|
Like update_lock(), but takes a dict and doesn't try to do anything smart
|
|
by itself
|
|
"""
|
|
name = node_dict.get('name')
|
|
if not name:
|
|
raise ValueError("must specify name")
|
|
if not config.lock_server:
|
|
return
|
|
uri = os.path.join(config.lock_server, 'nodes', name, '')
|
|
log.info("Updating %s on lock server", name)
|
|
response = requests.put(
|
|
uri,
|
|
json.dumps(node_dict))
|
|
if response.status_code == 404:
|
|
log.info("Creating new node %s on lock server", name)
|
|
uri = os.path.join(config.lock_server, 'nodes', '')
|
|
response = requests.post(
|
|
uri,
|
|
json.dumps(node_dict))
|
|
if not response.ok:
|
|
log.error("Node update/creation failed for %s: %s",
|
|
name, response.text)
|
|
return response.ok
|
|
|
|
|
|
def ssh_keyscan(hostnames):
|
|
"""
|
|
Fetch the SSH public key of one or more hosts
|
|
"""
|
|
if isinstance(hostnames, basestring):
|
|
raise TypeError("'hostnames' must be a list")
|
|
hostnames = [misc.canonicalize_hostname(name, user=None) for name in
|
|
hostnames]
|
|
args = ['ssh-keyscan', '-t', 'rsa'] + hostnames
|
|
p = subprocess.Popen(
|
|
args=args,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
)
|
|
p.wait()
|
|
|
|
keys_dict = dict()
|
|
for line in p.stderr.readlines():
|
|
if not line.startswith('#'):
|
|
log.error(line)
|
|
for line in p.stdout.readlines():
|
|
host, key = line.strip().split(' ', 1)
|
|
keys_dict[host] = key
|
|
return keys_dict
|
|
|
|
|
|
def updatekeys(args):
|
|
loglevel = logging.DEBUG if args['--verbose'] else logging.INFO
|
|
logging.basicConfig(
|
|
level=loglevel,
|
|
)
|
|
all_ = args['--all']
|
|
if all_:
|
|
machines = []
|
|
elif args['<machine>']:
|
|
machines = [misc.canonicalize_hostname(m, user=None)
|
|
for m in args['<machine>']]
|
|
elif args['--targets']:
|
|
targets = args['--targets']
|
|
with file(targets) as f:
|
|
docs = yaml.safe_load_all(f)
|
|
for doc in docs:
|
|
machines = [n for n in doc.get('targets', dict()).iterkeys()]
|
|
|
|
return do_update_keys(machines, all_)
|
|
|
|
|
|
def do_update_keys(machines, all_=False):
|
|
reference = list_locks(keyed_by_name=True)
|
|
if all_:
|
|
machines = reference.keys()
|
|
keys_dict = ssh_keyscan(machines)
|
|
return push_new_keys(keys_dict, reference)
|
|
|
|
|
|
def push_new_keys(keys_dict, reference):
|
|
ret = 0
|
|
for hostname, pubkey in keys_dict.iteritems():
|
|
log.info('Checking %s', hostname)
|
|
if reference[hostname]['ssh_pub_key'] != pubkey:
|
|
log.info('New key found. Updating...')
|
|
if not update_lock(hostname, ssh_pub_key=pubkey):
|
|
log.error('failed to update %s!', hostname)
|
|
ret = 1
|
|
return ret
|
|
|
|
|
|
def do_summary(ctx):
|
|
lockd = collections.defaultdict(lambda: [0, 0, 'unknown'])
|
|
if ctx.machine_type:
|
|
locks = list_locks(machine_type=ctx.machine_type)
|
|
else:
|
|
locks = list_locks()
|
|
for l in locks:
|
|
who = l['locked_by'] if l['locked'] == 1 \
|
|
else '(free)', l['machine_type']
|
|
lockd[who][0] += 1
|
|
lockd[who][1] += 1 if l['up'] else 0
|
|
lockd[who][2] = l['machine_type']
|
|
|
|
locks = sorted([p for p in lockd.iteritems()
|
|
], key=lambda sort: (sort[1][2], sort[1][0]))
|
|
total_count, total_up = 0, 0
|
|
print "TYPE COUNT UP OWNER"
|
|
|
|
for (owner, (count, upcount, machinetype)) in locks:
|
|
# if machinetype == spectype:
|
|
print "{machinetype:8s} {count:3d} {up:3d} {owner}".format(
|
|
count=count, up=upcount, owner=owner[0],
|
|
machinetype=machinetype)
|
|
total_count += count
|
|
total_up += upcount
|
|
|
|
print " --- ---"
|
|
print "{cnt:12d} {up:3d}".format(cnt=total_count, up=total_up)
|