diff --git a/bootstrap b/bootstrap index cf029b6c3cb..ebecbad7b68 100755 --- a/bootstrap +++ b/bootstrap @@ -1,7 +1,7 @@ #!/bin/sh set -e -for package in python-dev python-pip python-virtualenv libevent-dev; do +for package in python-dev python-pip python-virtualenv libevent-dev python-libvirt; do if [ "$(dpkg --status -- $package|sed -n 's/^Status: //p')" != "install ok installed" ]; then # add a space after old values missing="${missing:+$missing }$package" @@ -13,7 +13,9 @@ if [ -n "$missing" ]; then exit 1 fi -virtualenv --no-site-packages --distribute virtualenv +# site packages needed because libvirt python bindings are not nicely +# packaged +virtualenv --system-site-packages --distribute virtualenv # avoid pip bugs ./virtualenv/bin/pip install --upgrade pip diff --git a/teuthology/lock.py b/teuthology/lock.py index 66f73c1edc1..a716bfcbd84 100644 --- a/teuthology/lock.py +++ b/teuthology/lock.py @@ -8,6 +8,7 @@ import re import collections import tempfile import os +import time from teuthology import lockstatus as ls from teuthology import misc as teuthology @@ -65,6 +66,13 @@ def list_locks(ctx): return None def update_lock(ctx, name, description=None, status=None, sshpubkey=None): + status_info = ls.get_status(ctx, name) + phys_host = status_info['vpshost'] + if phys_host: + keyscan_out = '' + while not keyscan_out: + time.sleep(10) + keyscan_out, _ = keyscan_check(ctx, [name]) updated = {} if description is not None: updated['desc'] = description @@ -540,7 +548,7 @@ def create_if_vm(ctx, machine_name): if not file_out: file_info = {} file_info['disk-size'] = lcnfg.get('disk-size', '30G') - file_info['ram'] = lcnfg.get('ram', '4G') + file_info['ram'] = lcnfg.get('ram', '1.9G') file_info['cpus'] = lcnfg.get('cpus', 1) file_info['networks'] = lcnfg.get('networks', [{'source' : 'front'}]) @@ -562,7 +570,7 @@ def create_if_vm(ctx, machine_name): stdout=subprocess.PIPE,stderr=subprocess.PIPE,) owt,err = p.communicate() if err: - log.info("Downburst command to create %s failed: %s" % + log.info("Downburst completed on %s: %s" % (machine_name,err)) else: log.info("%s created: %s" % (machine_name,owt)) diff --git a/teuthology/lockstatus.py b/teuthology/lockstatus.py index 9e63614a6e6..5c25479efc7 100644 --- a/teuthology/lockstatus.py +++ b/teuthology/lockstatus.py @@ -5,8 +5,11 @@ import logging log = logging.getLogger(__name__) def _lock_url(ctx): - return ctx.teuthology_config['lock_server'] - + try: + return ctx.teuthology_config['lock_server'] + except (AttributeError, KeyError): + return "http://teuthology.front.sepia.ceph.com/locker/lock" + def send_request(method, url, body=None, headers=None): http = httplib2.Http() resp, content = http.request(url, method=method, body=body, headers=headers) @@ -21,5 +24,3 @@ def get_status(ctx, name): if success: return json.loads(content) return None - - diff --git a/teuthology/misc.py b/teuthology/misc.py index ce3e96d0717..1912c15151b 100644 --- a/teuthology/misc.py +++ b/teuthology/misc.py @@ -727,10 +727,14 @@ def reconnect(ctx, timeout, remotes=None): for remote in need_reconnect: try: log.info('trying to connect to %s', remote.name) + key = ctx.config['targets'][remote.name] + kstat = lockstatus.get_status(ctx,remote.name) + if 'sshpubkey' in kstat: + key = kstat['sshpubkey'] from .orchestra import connection remote.ssh = connection.connect( user_at_host=remote.name, - host_key=ctx.config['targets'][remote.name], + host_key=key, keep_alive=True, ) except Exception: diff --git a/teuthology/nuke.py b/teuthology/nuke.py index 8841edd8388..5a8efae28c5 100644 --- a/teuthology/nuke.py +++ b/teuthology/nuke.py @@ -416,7 +416,7 @@ def nuke_helper(ctx, log): log.debug('shortname: %s' % shortname) log.debug('{ctx}'.format(ctx=ctx)) if not ctx.noipmi and 'ipmi_user' in ctx.teuthology_config: - console = remote.RemoteConsole(name=host, + console = remote.getRemoteConsole(name=host, ipmiuser=ctx.teuthology_config['ipmi_user'], ipmipass=ctx.teuthology_config['ipmi_password'], ipmidomain=ctx.teuthology_config['ipmi_domain']) diff --git a/teuthology/orchestra/remote.py b/teuthology/orchestra/remote.py index f6aa9672e11..47824d5ee34 100644 --- a/teuthology/orchestra/remote.py +++ b/teuthology/orchestra/remote.py @@ -46,22 +46,26 @@ class Remote(object): import pexpect import re import logging +import libvirt +from teuthology import lockstatus as ls log = logging.getLogger(__name__) -class RemoteConsole(object): +def getShortName(name): + hn = name.split('@')[-1] + p = re.compile('([^.]+)\.?.*') + return p.match(hn).groups()[0] + +class PhysicalConsole(): def __init__(self, name, ipmiuser, ipmipass, ipmidomain, logfile=None, timeout=20): self.name = name + self.shortname = getShortName(name) + self.timeout = timeout + self.logfile = None self.ipmiuser = ipmiuser self.ipmipass = ipmipass self.ipmidomain = ipmidomain - self.timeout = timeout - self.logfile = None - - hn = self.name.split('@')[-1] - p = re.compile('([^.]+)\.?.*') - self.shortname = p.match(hn).groups()[0] def _exec(self, cmd): if not self.ipmiuser or not self.ipmipass or not self.ipmidomain: @@ -83,6 +87,34 @@ class RemoteConsole(object): child.logfile = self.logfile return child + def _exit_session(self, child, timeout=None): + child.send('~.') + t = timeout + if not t: + t = self.timeout + r = child.expect(['terminated ipmitool', pexpect.TIMEOUT, pexpect.EOF], timeout=t) + if r != 0: + self._exec('sol deactivate') + + def _wait_for_login(self, timeout=None, attempts=6): + log.debug('Waiting for login prompt on {s}'.format(s=self.shortname)) + # wait for login prompt to indicate boot completed + t = timeout + if not t: + t = self.timeout + for i in range(0, attempts): + start = time.time() + while time.time() - start < t: + child = self._exec('sol activate') + child.send('\n') + log.debug('expect: {s} login'.format(s=self.shortname)) + r = child.expect(['{s} login: '.format(s=self.shortname), pexpect.TIMEOUT, pexpect.EOF], timeout=(t - (time.time() - start))) + log.debug('expect before: {b}'.format(b=child.before)) + log.debug('expect after: {a}'.format(a=child.after)) + + self._exit_session(child) + if r == 0: + return def check_power(self, state, timeout=None): # check power total_timeout = timeout @@ -118,36 +150,6 @@ class RemoteConsole(object): log.info('Failed to get ipmi console status for {s}: {e}'.format(s=self.shortname, e=e)) return False - def _exit_session(self, child, timeout=None): - child.send('~.') - t = timeout - if not t: - t = self.timeout - r = child.expect(['terminated ipmitool', pexpect.TIMEOUT, pexpect.EOF], timeout=t) - if r != 0: - self._exec('sol deactivate') - - def _wait_for_login(self, timeout=None, attempts=6): - log.debug('Waiting for login prompt on {s}'.format(s=self.shortname)) - # wait for login prompt to indicate boot completed - t = timeout - if not t: - t = self.timeout - for i in range(0, attempts): - start = time.time() - while time.time() - start < t: - child = self._exec('sol activate') - child.send('\n') - log.debug('expect: {s} login'.format(s=self.shortname)) - r = child.expect(['{s} login: '.format(s=self.shortname), pexpect.TIMEOUT, pexpect.EOF], timeout=(t - (time.time() - start))) - log.debug('expect before: {b}'.format(b=child.before)) - log.debug('expect after: {a}'.format(a=child.after)) - - self._exit_session(child) - if r == 0: - return - raise pexpect.TIMEOUT ('Timeout exceeded ({t}) for {a} attempts in _wait_for_login()'.format(t=t, a=attempts)) - def power_cycle(self): log.info('Power cycling {s}'.format(s=self.shortname)) child = self._exec('power cycle') @@ -166,18 +168,6 @@ class RemoteConsole(object): self._wait_for_login() log.info('Hard reset for {s} completed'.format(s=self.shortname)) - def power_off(self): - log.info('Power off {s}'.format(s=self.shortname)) - start = time.time() - while time.time() - start < self.timeout: - child = self._exec('power off') - r = child.expect(['Chassis Power Control: Down/Off', pexpect.EOF], timeout=self.timeout) - if r == 0: - break - if not self.check_power('off', 60): - log.error('Failed to power off {s}'.format(s=self.shortname)) - log.info('Power off for {s} completed'.format(s=self.shortname)) - def power_on(self): log.info('Power on {s}'.format(s=self.shortname)) start = time.time() @@ -190,6 +180,18 @@ class RemoteConsole(object): log.error('Failed to power on {s}'.format(s=self.shortname)) log.info('Power on for {s} completed'.format(s=self.shortname)) + def power_off(self): + log.info('Power off {s}'.format(s=self.shortname)) + start = time.time() + while time.time() - start < self.timeout: + child = self._exec('power off') + r = child.expect(['Chassis Power Control: Down/Off', pexpect.EOF], timeout=self.timeout) + if r == 0: + break + if not self.check_power('off', 60): + log.error('Failed to power off {s}'.format(s=self.shortname)) + log.info('Power off for {s} completed'.format(s=self.shortname)) + def power_off_for_interval(self, interval=30): log.info('Power off {s} for {i} seconds'.format(s=self.shortname, i=interval)) child = self._exec('power off') @@ -199,7 +201,54 @@ class RemoteConsole(object): child = self._exec('power on') child.expect('Chassis Power Control: Up/On', timeout=self.timeout) - self._wait_for_login() - log.info('Power off for {i} seconds completed'.format(s=self.shortname, i=interval)) + +class VirtualConsole(): + + def __init__(self, name, ipmiuser, ipmipass, ipmidomain, logfile=None, timeout=20): + self.shortname = getShortName(name) + status_info = ls.get_status('', self.shortname) + try: + phys_host = status_info['vpshost'] + except TypeError: + return + self.connection = libvirt.open(phys_host) + for i in self.connection.listDomainsID(): + d = con.lookupByID(i) + if d.name() == self.shortname: + self.vm_domain = d + break + return + + def check_power(self, state, timeout=None): + return self.vm_domain.info[0] in [libvirt.VIR_DOMAIN_RUNNING, libvirt.VIR_DOMAIN_BLOCKED, + libvirt.VIR_DOMAIN_PAUSED] + + def check_status(self, timeout=None): + return self.vm_domain.info()[0] == libvirt.VIR_DOMAIN_RUNNING + + def power_cycle(self): + self.vm_domain.info().destroy() + self.vm_domain.info().create() + + def hard_reset(self): + self.vm_domain.info().destroy() + + def power_on(self): + self.vm_domain.info().create() + + def power_off(self): + self.vm_domain.info().destroy() + + def power_off_for_interval(self, interval=30): + log.info('Power off {s} for {i} seconds'.format(s=self.shortname, i=interval)) + self.vm_domain.info().destroy() + time.sleep(interval) + self.vm_domain.info().create() + log.info('Power off for {i} seconds completed'.format(s=self.shortname, i=interval)) + +def getRemoteConsole(name, ipmiuser, ipmipass, ipmidomain, logfile=None, timeout=20): + if name.startswith('vpm'): + return VirtualConsole(name, ipmiuser, ipmipass, ipmidomain, logfile, timeout) + return PhysicalConsole(name, ipmiuser, ipmipass, ipmidomain, logfile, timeout) diff --git a/teuthology/task/internal.py b/teuthology/task/internal.py index 53249685e1a..444341eb4e5 100644 --- a/teuthology/task/internal.py +++ b/teuthology/task/internal.py @@ -106,6 +106,7 @@ def lock_machines(ctx, config): while not keyscan_out: time.sleep(10) keyscan_out, current_locks = lock.keyscan_check(ctx, vmlist) + log.info('virtual machine is stil unavailable') if lock.update_keys(ctx, keyscan_out, current_locks): log.info("Error in virtual machine keys") newscandict = {} @@ -172,8 +173,18 @@ def connect(ctx, config): from ..orchestra import connection, remote from ..orchestra import cluster remotes = [] - for t, key in ctx.config['targets'].iteritems(): + machs = [] + for name in ctx.config['targets'].iterkeys(): + machs.append(name) + lock.scan_for_locks(ctx, machs) + for t, xkey in ctx.config['targets'].iteritems(): log.debug('connecting to %s', t) + log.info('Key is :%s:', xkey) + oldkeystatus = lockstatus.get_status(ctx, t) + key = xkey + if 'sshpubkey' in oldkeystatus: + log.info('possible key is :%s:',oldkeystatus['sshpubkey']) + key = oldkeystatus['sshpubkey'] remotes.append( remote.Remote(name=t, ssh=connection.connect(user_at_host=t, @@ -492,16 +503,16 @@ def vm_setup(ctx, config): Look for virtual machines and handle their initialization """ with parallel() as p: - editinfo = './teuthology/task/edit_sudoers.sh' + editinfo = os.path.join(os.path.dirname(__file__),'edit_sudoers.sh') for remote in ctx.cluster.remotes.iterkeys(): mname = re.match(".*@([^\.]*)\..*", str(remote)).group(1) - if mname[0:3] == 'vpm': + if mname.startswith('vpm'): r = remote.run(args=['test', '-e', '/ceph-qa-ready',], stdout=StringIO(), check_status=False,) if r.exitstatus != 0: p1 = subprocess.Popen(['cat', editinfo], stdout=subprocess.PIPE) - p2 = subprocess.Popen(['ssh','-t','-t',str(remote)],stdin=p1.stdout, stdout=subprocess.PIPE) + p2 = subprocess.Popen(['ssh','-t','-t',str(remote), 'sudo', 'sh'],stdin=p1.stdout, stdout=subprocess.PIPE) _,err = p2.communicate() if err: log.info("Edit of /etc/sudoers failed: %s",err) diff --git a/teuthology/task/thrashosds.py b/teuthology/task/thrashosds.py index ea997fdcfc0..2c788d47b03 100644 --- a/teuthology/task/thrashosds.py +++ b/teuthology/task/thrashosds.py @@ -105,7 +105,7 @@ def task(ctx, config): host = t.split('@')[-1] shortname = host.split('.')[0] from ..orchestra import remote as oremote - console = oremote.RemoteConsole( + console = oremote.getRemoteConsole( name=host, ipmiuser=ctx.teuthology_config['ipmi_user'], ipmipass=ctx.teuthology_config['ipmi_password'],