mirror of
https://github.com/ceph/ceph
synced 2024-12-25 12:54:16 +00:00
da7483a741
In order to make IP addresses less likely to change and to allow a smaller DHCP pool to be used I generated static MAC addresses for all the vpm entries in the DB. I also put the correct entries for all the other types of machines as well for their primary (eth0) mac address as well in order to keep things standardized and so there is another location where we have this information. Without this fix going through a few tests would exhaust the DHCP pool which at the time was around 460 IP addresses for virtual machines and has since been upped to ~690 IP addresses. Signed-off-by: Sandon Van Ness <sandon@inktank.com> Reviewed-by: Warren Usui <warren.usui@inktank.com>
607 lines
20 KiB
Python
607 lines
20 KiB
Python
import argparse
|
|
import json
|
|
import logging
|
|
import subprocess
|
|
import urllib
|
|
import yaml
|
|
import re
|
|
import collections
|
|
import tempfile
|
|
import os
|
|
import time
|
|
|
|
from teuthology import lockstatus as ls
|
|
from teuthology import misc as teuthology
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
def lock_many(ctx, num, machinetype, user=None, description=None):
|
|
if user is None:
|
|
user = teuthology.get_user()
|
|
success, content, status = ls.send_request('POST', ls._lock_url(ctx),
|
|
urllib.urlencode(dict(
|
|
user=user,
|
|
num=num,
|
|
machinetype=machinetype,
|
|
desc=description,
|
|
)))
|
|
if success:
|
|
machines = json.loads(content)
|
|
log.debug('locked {machines}'.format(machines=', '.join(machines.keys())))
|
|
return machines
|
|
if status == 503:
|
|
log.error('Insufficient nodes available to lock %d nodes.', num)
|
|
else:
|
|
log.error('Could not lock %d nodes, reason: unknown.', num)
|
|
return []
|
|
|
|
def lock(ctx, name, user=None):
|
|
if user is None:
|
|
user = teuthology.get_user()
|
|
success, _, _ = ls.send_request('POST', ls._lock_url(ctx) + '/' + name,
|
|
urllib.urlencode(dict(user=user)))
|
|
if success:
|
|
log.debug('locked %s as %s', name, user)
|
|
else:
|
|
log.error('failed to lock %s', name)
|
|
return success
|
|
|
|
def unlock(ctx, name, user=None):
|
|
if user is None:
|
|
user = teuthology.get_user()
|
|
success, _ , _ = ls.send_request('DELETE', ls._lock_url(ctx) + '/' + name + '?' + \
|
|
urllib.urlencode(dict(user=user)))
|
|
if success:
|
|
log.debug('unlocked %s', name)
|
|
if not destroy_if_vm(ctx, name):
|
|
log.error('downburst destroy failed for %s',name)
|
|
else:
|
|
log.error('failed to unlock %s', name)
|
|
return success
|
|
|
|
def list_locks(ctx):
|
|
success, content, _ = ls.send_request('GET', ls._lock_url(ctx))
|
|
if success:
|
|
return json.loads(content)
|
|
return None
|
|
|
|
def update_lock(ctx, name, description=None, status=None, sshpubkey=None):
|
|
status_info = ls.get_status(ctx, name)
|
|
phys_host = status_info['vpshost']
|
|
if phys_host:
|
|
keyscan_out = ''
|
|
while not keyscan_out:
|
|
time.sleep(10)
|
|
keyscan_out, _ = keyscan_check(ctx, [name])
|
|
updated = {}
|
|
if description is not None:
|
|
updated['desc'] = description
|
|
if status is not None:
|
|
updated['status'] = status
|
|
if sshpubkey is not None:
|
|
updated['sshpubkey'] = sshpubkey
|
|
|
|
if updated:
|
|
success, _, _ = ls.send_request('PUT', ls._lock_url(ctx) + '/' + name,
|
|
body=urllib.urlencode(updated),
|
|
headers={'Content-type': 'application/x-www-form-urlencoded'})
|
|
return success
|
|
return True
|
|
|
|
def _positive_int(string):
|
|
value = int(string)
|
|
if value < 1:
|
|
raise argparse.ArgumentTypeError(
|
|
'{string} is not positive'.format(string=string))
|
|
return value
|
|
|
|
def canonicalize_hostname(s):
|
|
if re.match('ubuntu@.*\.front\.sepia\.ceph\.com', s) is None:
|
|
s = 'ubuntu@' + s + '.front.sepia.ceph.com'
|
|
return s
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="""
|
|
Lock, unlock, or query lock status of machines.
|
|
""")
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true',
|
|
default=False,
|
|
help='be more verbose',
|
|
)
|
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
group.add_argument(
|
|
'--list',
|
|
action='store_true',
|
|
default=False,
|
|
help='Show lock info for machines owned by you, or only machines specified. Can be restricted by --owner, --status, and --locked.',
|
|
)
|
|
group.add_argument(
|
|
'--list-targets',
|
|
action='store_true',
|
|
default=False,
|
|
help='Show lock info for all machines, or only machines specified, in targets: yaml format. Can be restricted by --owner, --status, and --locked.',
|
|
)
|
|
group.add_argument(
|
|
'--lock',
|
|
action='store_true',
|
|
default=False,
|
|
help='lock particular machines',
|
|
)
|
|
group.add_argument(
|
|
'--unlock',
|
|
action='store_true',
|
|
default=False,
|
|
help='unlock particular machines',
|
|
)
|
|
group.add_argument(
|
|
'--lock-many',
|
|
dest='num_to_lock',
|
|
type=_positive_int,
|
|
help='lock this many machines',
|
|
)
|
|
group.add_argument(
|
|
'--update',
|
|
action='store_true',
|
|
default=False,
|
|
help='update the description or status of some machines',
|
|
)
|
|
group.add_argument(
|
|
'--summary',
|
|
action='store_true',
|
|
default=False,
|
|
help='summarize locked-machine counts by owner',
|
|
)
|
|
parser.add_argument(
|
|
'-a', '--all',
|
|
action='store_true',
|
|
default=False,
|
|
help='list all machines, not just those owned by you',
|
|
)
|
|
parser.add_argument(
|
|
'--owner',
|
|
default=None,
|
|
help='owner of the lock(s) (must match to unlock a machine)',
|
|
)
|
|
parser.add_argument(
|
|
'-f',
|
|
action='store_true',
|
|
default=False,
|
|
help='don\'t exit after the first error, continue locking or unlocking other machines',
|
|
)
|
|
parser.add_argument(
|
|
'--desc',
|
|
default=None,
|
|
help='lock description',
|
|
)
|
|
parser.add_argument(
|
|
'--desc-pattern',
|
|
default=None,
|
|
help='lock description',
|
|
)
|
|
parser.add_argument(
|
|
'--machine-type',
|
|
default='plana',
|
|
help='Type of machine to lock',
|
|
)
|
|
parser.add_argument(
|
|
'--status',
|
|
default=None,
|
|
choices=['up', 'down'],
|
|
help='whether a machine is usable for testing',
|
|
)
|
|
parser.add_argument(
|
|
'--locked',
|
|
default=None,
|
|
choices=['true', 'false'],
|
|
help='whether a machine is locked',
|
|
)
|
|
parser.add_argument(
|
|
'--brief',
|
|
action='store_true',
|
|
default=False,
|
|
help='Shorten information reported from --list',
|
|
)
|
|
parser.add_argument(
|
|
'-t', '--targets',
|
|
dest='targets',
|
|
default=None,
|
|
help='input yaml containing targets',
|
|
)
|
|
parser.add_argument(
|
|
'machines',
|
|
metavar='MACHINE',
|
|
default=[],
|
|
nargs='*',
|
|
help='machines to operate on',
|
|
)
|
|
parser.add_argument(
|
|
'--vm-type',
|
|
default='ubuntu',
|
|
help='virtual machine type',
|
|
)
|
|
|
|
ctx = parser.parse_args()
|
|
|
|
loglevel = logging.ERROR
|
|
if ctx.verbose:
|
|
loglevel = logging.DEBUG
|
|
|
|
logging.basicConfig(
|
|
level=loglevel,
|
|
)
|
|
|
|
teuthology.read_config(ctx)
|
|
|
|
ret = 0
|
|
user = ctx.owner
|
|
machines = [canonicalize_hostname(m) for m in ctx.machines]
|
|
machines_to_update = []
|
|
|
|
if ctx.targets:
|
|
try:
|
|
with file(ctx.targets) as f:
|
|
g = yaml.safe_load_all(f)
|
|
for new in g:
|
|
if 'targets' in new:
|
|
for t in new['targets'].iterkeys():
|
|
machines.append(t)
|
|
except IOError, e:
|
|
raise argparse.ArgumentTypeError(str(e))
|
|
|
|
if ctx.f:
|
|
assert ctx.lock or ctx.unlock, \
|
|
'-f is only supported by --lock and --unlock'
|
|
if machines:
|
|
assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \
|
|
or ctx.update, \
|
|
'machines cannot be specified with that operation'
|
|
else:
|
|
assert ctx.num_to_lock or ctx.list or ctx.list_targets or ctx.summary,\
|
|
'machines must be specified for that operation'
|
|
if ctx.all:
|
|
assert ctx.list or ctx.list_targets, \
|
|
'--all can only be used with --list and --list-targets'
|
|
assert ctx.owner is None, \
|
|
'--all and --owner are mutually exclusive'
|
|
assert not machines, \
|
|
'--all and listing specific machines are incompatible'
|
|
|
|
if ctx.brief:
|
|
assert ctx.list, '--brief only applies to --list'
|
|
|
|
if ctx.list or ctx.list_targets:
|
|
assert ctx.desc is None, '--desc does nothing with --list'
|
|
|
|
if machines:
|
|
statuses = [ls.get_status(ctx, machine) for machine in machines]
|
|
else:
|
|
statuses = list_locks(ctx)
|
|
vmachines = []
|
|
|
|
for vmachine in statuses:
|
|
if vmachine['vpshost']:
|
|
if vmachine['locked']:
|
|
vmachines.append(vmachine['name'])
|
|
if vmachines:
|
|
# Avoid ssh-keyscans for everybody when listing all machines
|
|
# Listing specific machines will update the keys.
|
|
if machines:
|
|
scan_for_locks(ctx, vmachines)
|
|
statuses = [ls.get_status(ctx, machine) for machine in machines]
|
|
else:
|
|
statuses = list_locks(ctx)
|
|
if statuses:
|
|
if not machines and ctx.owner is None and not ctx.all:
|
|
ctx.owner = teuthology.get_user()
|
|
if ctx.owner is not None:
|
|
statuses = [status for status in statuses \
|
|
if status['locked_by'] == ctx.owner]
|
|
if ctx.status is not None:
|
|
statuses = [status for status in statuses \
|
|
if status['up'] == (ctx.status == 'up')]
|
|
if ctx.locked is not None:
|
|
statuses = [status for status in statuses \
|
|
if status['locked'] == (ctx.locked == 'true')]
|
|
if ctx.desc is not None:
|
|
statuses = [status for status in statuses \
|
|
if status['description'] == ctx.desc]
|
|
if ctx.desc_pattern is not None:
|
|
statuses = [status for status in statuses \
|
|
if status['description'] is not None and \
|
|
status['description'].find(ctx.desc_pattern) >= 0]
|
|
if ctx.list:
|
|
if ctx.brief:
|
|
for s in statuses:
|
|
locked = "un" if s['locked'] == 0 else " "
|
|
mo = re.match('\w+@(\w+?)\..*', s['name'])
|
|
host = mo.group(1) if mo else s['name']
|
|
print '{host} {locked}locked {owner} "{desc}"'.format(
|
|
locked = locked, host = host,
|
|
owner=s['locked_by'], desc=s['description'])
|
|
else:
|
|
print json.dumps(statuses, indent=4)
|
|
else:
|
|
frag = { 'targets': {} }
|
|
for f in statuses:
|
|
frag['targets'][f['name']] = f['sshpubkey']
|
|
print yaml.safe_dump(frag, default_flow_style=False)
|
|
else:
|
|
log.error('error retrieving lock statuses')
|
|
ret = 1
|
|
|
|
elif ctx.summary:
|
|
do_summary(ctx)
|
|
return 0
|
|
|
|
elif ctx.lock:
|
|
for machine in machines:
|
|
if not lock(ctx, machine, user):
|
|
ret = 1
|
|
if not ctx.f:
|
|
return ret
|
|
else:
|
|
machines_to_update.append(machine)
|
|
create_if_vm(ctx, machine)
|
|
elif ctx.unlock:
|
|
for machine in machines:
|
|
if not unlock(ctx, machine, user):
|
|
ret = 1
|
|
if not ctx.f:
|
|
return ret
|
|
else:
|
|
machines_to_update.append(machine)
|
|
destroy_if_vm(ctx, machine)
|
|
elif ctx.num_to_lock:
|
|
result = lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user)
|
|
if not result:
|
|
ret = 1
|
|
else:
|
|
machines_to_update = result.keys()
|
|
if ctx.machine_type == 'vps':
|
|
print "Locks successful"
|
|
print "Unable to display keys at this time (virtual machines are rebooting)."
|
|
print "Please run teuthology-lock --list-targets once these machines come up."
|
|
else:
|
|
print yaml.safe_dump(dict(targets=result), default_flow_style=False)
|
|
elif ctx.update:
|
|
assert ctx.desc is not None or ctx.status is not None, \
|
|
'you must specify description or status to update'
|
|
assert ctx.owner is None, 'only description and status may be updated'
|
|
machines_to_update = machines
|
|
|
|
if ctx.desc is not None or ctx.status is not None:
|
|
for machine in machines_to_update:
|
|
update_lock(ctx, machine, ctx.desc, ctx.status)
|
|
|
|
return ret
|
|
|
|
def update_hostkeys():
|
|
parser = argparse.ArgumentParser(description="""
|
|
Update any hostkeys that have changed. You can list specific machines
|
|
to run on, or use -a to check all of them automatically.
|
|
""")
|
|
parser.add_argument(
|
|
'-t', '--targets',
|
|
default=None,
|
|
help='input yaml containing targets to check',
|
|
)
|
|
parser.add_argument(
|
|
'machines',
|
|
metavar='MACHINES',
|
|
default=[],
|
|
nargs='*',
|
|
help='hosts to check for updated keys',
|
|
)
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true',
|
|
default=False,
|
|
help='be more verbose',
|
|
)
|
|
parser.add_argument(
|
|
'-a', '--all',
|
|
action='store_true',
|
|
default=False,
|
|
help='update hostkeys of all machines in the db',
|
|
)
|
|
|
|
ctx = parser.parse_args()
|
|
|
|
loglevel = logging.ERROR
|
|
if ctx.verbose:
|
|
loglevel = logging.DEBUG
|
|
|
|
logging.basicConfig(
|
|
level=loglevel,
|
|
)
|
|
|
|
teuthology.read_config(ctx)
|
|
|
|
assert ctx.all or ctx.targets or ctx.machines, 'You must specify machines to update'
|
|
if ctx.all:
|
|
assert not ctx.targets and not ctx.machines, \
|
|
'You can\'t specify machines with the --all option'
|
|
machines = [canonicalize_hostname(m) for m in ctx.machines]
|
|
|
|
if ctx.targets:
|
|
try:
|
|
with file(ctx.targets) as f:
|
|
g = yaml.safe_load_all(f)
|
|
for new in g:
|
|
if 'targets' in new:
|
|
for t in new['targets'].iterkeys():
|
|
machines.append(t)
|
|
except IOError, e:
|
|
raise argparse.ArgumentTypeError(str(e))
|
|
|
|
return scan_for_locks(ctx, machines)
|
|
|
|
def keyscan_check(ctx, machines):
|
|
locks = list_locks(ctx)
|
|
current_locks = {}
|
|
for lock in locks:
|
|
current_locks[lock['name']] = lock
|
|
|
|
if hasattr(ctx, 'all'):
|
|
if ctx.all:
|
|
machines = current_locks.keys()
|
|
|
|
for i, machine in enumerate(machines):
|
|
if '@' in machine:
|
|
_, machines[i] = machine.rsplit('@')
|
|
args = ['ssh-keyscan']
|
|
args.extend(machines)
|
|
p = subprocess.Popen(
|
|
args=args,
|
|
stdout=subprocess.PIPE,
|
|
)
|
|
out, _ = p.communicate()
|
|
#assert p.returncode == 0, 'ssh-keyscan failed'
|
|
return (out, current_locks)
|
|
|
|
def update_keys(ctx, out, current_locks):
|
|
ret = 0
|
|
for key_entry in out.splitlines():
|
|
hostname, pubkey = key_entry.split(' ', 1)
|
|
# TODO: separate out user
|
|
full_name = 'ubuntu@{host}'.format(host=hostname)
|
|
log.info('Checking %s', full_name)
|
|
assert full_name in current_locks, 'host is not in the database!'
|
|
if current_locks[full_name]['sshpubkey'] != pubkey:
|
|
log.info('New key found. Updating...')
|
|
if not update_lock(ctx, full_name, sshpubkey=pubkey):
|
|
log.error('failed to update %s!', full_name)
|
|
ret = 1
|
|
return ret
|
|
|
|
def scan_for_locks(ctx, machines):
|
|
out, current_locks = keyscan_check(ctx, machines)
|
|
return update_keys(ctx, out, current_locks)
|
|
|
|
def do_summary(ctx):
|
|
lockd = collections.defaultdict(lambda: [0,0,'unknown'])
|
|
for l in list_locks(ctx):
|
|
if ctx.machine_type != 'all' and l['type'] != ctx.machine_type:
|
|
continue
|
|
who = l['locked_by'] if l['locked'] == 1 else '(free)', l['type']
|
|
lockd[who][0] += 1
|
|
lockd[who][1] += l['up'] # up is 1 or 0
|
|
lockd[who][2] = l['type']
|
|
|
|
locks = sorted([p for p in lockd.iteritems()], key=lambda sort: (sort[1][2],sort[1][0]))
|
|
total_count, total_up = 0, 0
|
|
print "TYPE COUNT UP OWNER"
|
|
|
|
for (owner, (count, upcount, machinetype)) in locks:
|
|
#if machinetype == spectype:
|
|
print "{machinetype:8s} {count:3d} {up:3d} {owner}".format(count = count,
|
|
up = upcount, owner = owner[0], machinetype=machinetype)
|
|
total_count += count
|
|
total_up += upcount
|
|
|
|
print " --- ---"
|
|
print "{cnt:12d} {up:3d}".format(cnt = total_count, up = total_up)
|
|
|
|
def decanonicalize_hostname(s):
|
|
if re.match('ubuntu@.*\.front\.sepia\.ceph\.com', s):
|
|
s = s[len('ubuntu@'): -len('.front.sepia.ceph.com')]
|
|
return s
|
|
|
|
def _get_downburst_exec():
|
|
"""
|
|
First check for downburst in the user's path.
|
|
Then check in ~/src, ~ubuntu/src, and ~teuthology/src.
|
|
Return '' if no executable downburst is found.
|
|
"""
|
|
path = os.environ.get('PATH', None)
|
|
if path:
|
|
for p in os.environ.get('PATH','').split(os.pathsep):
|
|
pth = os.path.join(p, 'downburst')
|
|
if os.access(pth, os.X_OK):
|
|
return pth
|
|
import pwd
|
|
little_old_me = pwd.getpwuid(os.getuid()).pw_name
|
|
for user in [little_old_me, 'ubuntu', 'teuthology']:
|
|
pth = "/home/%s/src/downburst/virtualenv/bin/downburst" % user
|
|
if os.access(pth, os.X_OK):
|
|
return pth
|
|
return ''
|
|
|
|
#
|
|
# Use downburst to create a virtual machine
|
|
#
|
|
def create_if_vm(ctx, machine_name):
|
|
status_info = ls.get_status(ctx, machine_name)
|
|
phys_host = status_info['vpshost']
|
|
if not phys_host:
|
|
return False
|
|
try:
|
|
vm_type = ctx.vm_type
|
|
except AttributeError:
|
|
vm_type = 'ubuntu'
|
|
createMe = decanonicalize_hostname(machine_name)
|
|
with tempfile.NamedTemporaryFile() as tmp:
|
|
try:
|
|
lcnfg = ctx.config['downburst']
|
|
except KeyError:
|
|
lcnfg = {}
|
|
|
|
file_info = {}
|
|
file_info['disk-size'] = lcnfg.get('disk-size', '30G')
|
|
file_info['ram'] = lcnfg.get('ram', '1.9G')
|
|
file_info['cpus'] = lcnfg.get('cpus', 1)
|
|
file_info['networks'] = lcnfg.get('networks',
|
|
[{'source' : 'front', 'mac' : status_info['mac']}]
|
|
file_info['distro'] = lcnfg.get('distro', vm_type.lower())
|
|
file_info['additional-disks'] = lcnfg.get(
|
|
'additional-disks', 3)
|
|
file_info['additional-disks-size'] = lcnfg.get(
|
|
'additional-disks-size', '200G')
|
|
file_info['arch'] = lcnfg.get('arch', 'x86_64')
|
|
file_out = {'downburst': file_info}
|
|
yaml.safe_dump(file_out, tmp)
|
|
metadata = "--meta-data=%s" % tmp.name
|
|
dbrst = _get_downburst_exec()
|
|
if not dbrst:
|
|
log.info("Error: no downburst executable found")
|
|
return False
|
|
p = subprocess.Popen([dbrst, '-c', phys_host,
|
|
'create', metadata, createMe],
|
|
stdout=subprocess.PIPE,stderr=subprocess.PIPE,)
|
|
owt,err = p.communicate()
|
|
if err:
|
|
log.info("Downburst completed on %s: %s" %
|
|
(machine_name,err))
|
|
else:
|
|
log.info("%s created: %s" % (machine_name,owt))
|
|
return True
|
|
#
|
|
# Use downburst to destroy a virtual machine
|
|
#
|
|
def destroy_if_vm(ctx, machine_name):
|
|
"""
|
|
Return False only on vm downburst failures.
|
|
"""
|
|
status_info = ls.get_status(ctx, machine_name)
|
|
phys_host = status_info['vpshost']
|
|
if not phys_host:
|
|
return True
|
|
destroyMe = decanonicalize_hostname(machine_name)
|
|
dbrst = _get_downburst_exec()
|
|
if not dbrst:
|
|
log.info("Error: no downburst executable found")
|
|
return False
|
|
p = subprocess.Popen([dbrst, '-c', phys_host,
|
|
'destroy', destroyMe],
|
|
stdout=subprocess.PIPE,stderr=subprocess.PIPE,)
|
|
owt,err = p.communicate()
|
|
if err:
|
|
log.info("Error occurred while deleting %s" % destroyMe)
|
|
return False
|
|
else:
|
|
log.info("%s destroyed: %s" % (machine_name,owt))
|
|
return True
|
|
|