mirror of
https://github.com/ceph/ceph
synced 2025-02-09 03:49:38 +00:00
Support added for running scheduled tasks on virtual machines.
This included: A). changes made so that full path names on some files were used (scheduled tasks started in different home directories). B.) Changes to insure tasks come up on the beanstalkc queue properly, C.) Finding and inserting the libvirt eqivalent code for vm machines in order to simulate ipmi actions, D.) Fix host key code, report valgrind issue more clearly. E.) Some message and downburst call changes. Fix #4988 Fix #5122 Signed-off-by: Warren Usui <warren.usui@inktank.com>
This commit is contained in:
parent
c95698045d
commit
a4994e3bde
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
for package in python-dev python-pip python-virtualenv libevent-dev; do
|
||||
for package in python-dev python-pip python-virtualenv libevent-dev python-libvirt; do
|
||||
if [ "$(dpkg --status -- $package|sed -n 's/^Status: //p')" != "install ok installed" ]; then
|
||||
# add a space after old values
|
||||
missing="${missing:+$missing }$package"
|
||||
@ -13,7 +13,9 @@ if [ -n "$missing" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
virtualenv --no-site-packages --distribute virtualenv
|
||||
# site packages needed because libvirt python bindings are not nicely
|
||||
# packaged
|
||||
virtualenv --system-site-packages --distribute virtualenv
|
||||
|
||||
# avoid pip bugs
|
||||
./virtualenv/bin/pip install --upgrade pip
|
||||
|
@ -8,6 +8,7 @@ import re
|
||||
import collections
|
||||
import tempfile
|
||||
import os
|
||||
import time
|
||||
|
||||
from teuthology import lockstatus as ls
|
||||
from teuthology import misc as teuthology
|
||||
@ -65,6 +66,13 @@ def list_locks(ctx):
|
||||
return None
|
||||
|
||||
def update_lock(ctx, name, description=None, status=None, sshpubkey=None):
|
||||
status_info = ls.get_status(ctx, name)
|
||||
phys_host = status_info['vpshost']
|
||||
if phys_host:
|
||||
keyscan_out = ''
|
||||
while not keyscan_out:
|
||||
time.sleep(10)
|
||||
keyscan_out, _ = keyscan_check(ctx, [name])
|
||||
updated = {}
|
||||
if description is not None:
|
||||
updated['desc'] = description
|
||||
@ -540,7 +548,7 @@ def create_if_vm(ctx, machine_name):
|
||||
if not file_out:
|
||||
file_info = {}
|
||||
file_info['disk-size'] = lcnfg.get('disk-size', '30G')
|
||||
file_info['ram'] = lcnfg.get('ram', '4G')
|
||||
file_info['ram'] = lcnfg.get('ram', '1.9G')
|
||||
file_info['cpus'] = lcnfg.get('cpus', 1)
|
||||
file_info['networks'] = lcnfg.get('networks',
|
||||
[{'source' : 'front'}])
|
||||
@ -562,7 +570,7 @@ def create_if_vm(ctx, machine_name):
|
||||
stdout=subprocess.PIPE,stderr=subprocess.PIPE,)
|
||||
owt,err = p.communicate()
|
||||
if err:
|
||||
log.info("Downburst command to create %s failed: %s" %
|
||||
log.info("Downburst completed on %s: %s" %
|
||||
(machine_name,err))
|
||||
else:
|
||||
log.info("%s created: %s" % (machine_name,owt))
|
||||
|
@ -5,8 +5,11 @@ import logging
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def _lock_url(ctx):
|
||||
return ctx.teuthology_config['lock_server']
|
||||
|
||||
try:
|
||||
return ctx.teuthology_config['lock_server']
|
||||
except (AttributeError, KeyError):
|
||||
return "http://teuthology.front.sepia.ceph.com/locker/lock"
|
||||
|
||||
def send_request(method, url, body=None, headers=None):
|
||||
http = httplib2.Http()
|
||||
resp, content = http.request(url, method=method, body=body, headers=headers)
|
||||
@ -21,5 +24,3 @@ def get_status(ctx, name):
|
||||
if success:
|
||||
return json.loads(content)
|
||||
return None
|
||||
|
||||
|
||||
|
@ -727,10 +727,14 @@ def reconnect(ctx, timeout, remotes=None):
|
||||
for remote in need_reconnect:
|
||||
try:
|
||||
log.info('trying to connect to %s', remote.name)
|
||||
key = ctx.config['targets'][remote.name]
|
||||
kstat = lockstatus.get_status(ctx,remote.name)
|
||||
if 'sshpubkey' in kstat:
|
||||
key = kstat['sshpubkey']
|
||||
from .orchestra import connection
|
||||
remote.ssh = connection.connect(
|
||||
user_at_host=remote.name,
|
||||
host_key=ctx.config['targets'][remote.name],
|
||||
host_key=key,
|
||||
keep_alive=True,
|
||||
)
|
||||
except Exception:
|
||||
|
@ -416,7 +416,7 @@ def nuke_helper(ctx, log):
|
||||
log.debug('shortname: %s' % shortname)
|
||||
log.debug('{ctx}'.format(ctx=ctx))
|
||||
if not ctx.noipmi and 'ipmi_user' in ctx.teuthology_config:
|
||||
console = remote.RemoteConsole(name=host,
|
||||
console = remote.getRemoteConsole(name=host,
|
||||
ipmiuser=ctx.teuthology_config['ipmi_user'],
|
||||
ipmipass=ctx.teuthology_config['ipmi_password'],
|
||||
ipmidomain=ctx.teuthology_config['ipmi_domain'])
|
||||
|
@ -46,22 +46,26 @@ class Remote(object):
|
||||
import pexpect
|
||||
import re
|
||||
import logging
|
||||
import libvirt
|
||||
from teuthology import lockstatus as ls
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class RemoteConsole(object):
|
||||
def getShortName(name):
|
||||
hn = name.split('@')[-1]
|
||||
p = re.compile('([^.]+)\.?.*')
|
||||
return p.match(hn).groups()[0]
|
||||
|
||||
class PhysicalConsole():
|
||||
|
||||
def __init__(self, name, ipmiuser, ipmipass, ipmidomain, logfile=None, timeout=20):
|
||||
self.name = name
|
||||
self.shortname = getShortName(name)
|
||||
self.timeout = timeout
|
||||
self.logfile = None
|
||||
self.ipmiuser = ipmiuser
|
||||
self.ipmipass = ipmipass
|
||||
self.ipmidomain = ipmidomain
|
||||
self.timeout = timeout
|
||||
self.logfile = None
|
||||
|
||||
hn = self.name.split('@')[-1]
|
||||
p = re.compile('([^.]+)\.?.*')
|
||||
self.shortname = p.match(hn).groups()[0]
|
||||
|
||||
def _exec(self, cmd):
|
||||
if not self.ipmiuser or not self.ipmipass or not self.ipmidomain:
|
||||
@ -83,6 +87,34 @@ class RemoteConsole(object):
|
||||
child.logfile = self.logfile
|
||||
return child
|
||||
|
||||
def _exit_session(self, child, timeout=None):
|
||||
child.send('~.')
|
||||
t = timeout
|
||||
if not t:
|
||||
t = self.timeout
|
||||
r = child.expect(['terminated ipmitool', pexpect.TIMEOUT, pexpect.EOF], timeout=t)
|
||||
if r != 0:
|
||||
self._exec('sol deactivate')
|
||||
|
||||
def _wait_for_login(self, timeout=None, attempts=6):
|
||||
log.debug('Waiting for login prompt on {s}'.format(s=self.shortname))
|
||||
# wait for login prompt to indicate boot completed
|
||||
t = timeout
|
||||
if not t:
|
||||
t = self.timeout
|
||||
for i in range(0, attempts):
|
||||
start = time.time()
|
||||
while time.time() - start < t:
|
||||
child = self._exec('sol activate')
|
||||
child.send('\n')
|
||||
log.debug('expect: {s} login'.format(s=self.shortname))
|
||||
r = child.expect(['{s} login: '.format(s=self.shortname), pexpect.TIMEOUT, pexpect.EOF], timeout=(t - (time.time() - start)))
|
||||
log.debug('expect before: {b}'.format(b=child.before))
|
||||
log.debug('expect after: {a}'.format(a=child.after))
|
||||
|
||||
self._exit_session(child)
|
||||
if r == 0:
|
||||
return
|
||||
def check_power(self, state, timeout=None):
|
||||
# check power
|
||||
total_timeout = timeout
|
||||
@ -118,36 +150,6 @@ class RemoteConsole(object):
|
||||
log.info('Failed to get ipmi console status for {s}: {e}'.format(s=self.shortname, e=e))
|
||||
return False
|
||||
|
||||
def _exit_session(self, child, timeout=None):
|
||||
child.send('~.')
|
||||
t = timeout
|
||||
if not t:
|
||||
t = self.timeout
|
||||
r = child.expect(['terminated ipmitool', pexpect.TIMEOUT, pexpect.EOF], timeout=t)
|
||||
if r != 0:
|
||||
self._exec('sol deactivate')
|
||||
|
||||
def _wait_for_login(self, timeout=None, attempts=6):
|
||||
log.debug('Waiting for login prompt on {s}'.format(s=self.shortname))
|
||||
# wait for login prompt to indicate boot completed
|
||||
t = timeout
|
||||
if not t:
|
||||
t = self.timeout
|
||||
for i in range(0, attempts):
|
||||
start = time.time()
|
||||
while time.time() - start < t:
|
||||
child = self._exec('sol activate')
|
||||
child.send('\n')
|
||||
log.debug('expect: {s} login'.format(s=self.shortname))
|
||||
r = child.expect(['{s} login: '.format(s=self.shortname), pexpect.TIMEOUT, pexpect.EOF], timeout=(t - (time.time() - start)))
|
||||
log.debug('expect before: {b}'.format(b=child.before))
|
||||
log.debug('expect after: {a}'.format(a=child.after))
|
||||
|
||||
self._exit_session(child)
|
||||
if r == 0:
|
||||
return
|
||||
raise pexpect.TIMEOUT ('Timeout exceeded ({t}) for {a} attempts in _wait_for_login()'.format(t=t, a=attempts))
|
||||
|
||||
def power_cycle(self):
|
||||
log.info('Power cycling {s}'.format(s=self.shortname))
|
||||
child = self._exec('power cycle')
|
||||
@ -166,18 +168,6 @@ class RemoteConsole(object):
|
||||
self._wait_for_login()
|
||||
log.info('Hard reset for {s} completed'.format(s=self.shortname))
|
||||
|
||||
def power_off(self):
|
||||
log.info('Power off {s}'.format(s=self.shortname))
|
||||
start = time.time()
|
||||
while time.time() - start < self.timeout:
|
||||
child = self._exec('power off')
|
||||
r = child.expect(['Chassis Power Control: Down/Off', pexpect.EOF], timeout=self.timeout)
|
||||
if r == 0:
|
||||
break
|
||||
if not self.check_power('off', 60):
|
||||
log.error('Failed to power off {s}'.format(s=self.shortname))
|
||||
log.info('Power off for {s} completed'.format(s=self.shortname))
|
||||
|
||||
def power_on(self):
|
||||
log.info('Power on {s}'.format(s=self.shortname))
|
||||
start = time.time()
|
||||
@ -190,6 +180,18 @@ class RemoteConsole(object):
|
||||
log.error('Failed to power on {s}'.format(s=self.shortname))
|
||||
log.info('Power on for {s} completed'.format(s=self.shortname))
|
||||
|
||||
def power_off(self):
|
||||
log.info('Power off {s}'.format(s=self.shortname))
|
||||
start = time.time()
|
||||
while time.time() - start < self.timeout:
|
||||
child = self._exec('power off')
|
||||
r = child.expect(['Chassis Power Control: Down/Off', pexpect.EOF], timeout=self.timeout)
|
||||
if r == 0:
|
||||
break
|
||||
if not self.check_power('off', 60):
|
||||
log.error('Failed to power off {s}'.format(s=self.shortname))
|
||||
log.info('Power off for {s} completed'.format(s=self.shortname))
|
||||
|
||||
def power_off_for_interval(self, interval=30):
|
||||
log.info('Power off {s} for {i} seconds'.format(s=self.shortname, i=interval))
|
||||
child = self._exec('power off')
|
||||
@ -199,7 +201,54 @@ class RemoteConsole(object):
|
||||
|
||||
child = self._exec('power on')
|
||||
child.expect('Chassis Power Control: Up/On', timeout=self.timeout)
|
||||
|
||||
self._wait_for_login()
|
||||
|
||||
log.info('Power off for {i} seconds completed'.format(s=self.shortname, i=interval))
|
||||
|
||||
class VirtualConsole():
|
||||
|
||||
def __init__(self, name, ipmiuser, ipmipass, ipmidomain, logfile=None, timeout=20):
|
||||
self.shortname = getShortName(name)
|
||||
status_info = ls.get_status('', self.shortname)
|
||||
try:
|
||||
phys_host = status_info['vpshost']
|
||||
except TypeError:
|
||||
return
|
||||
self.connection = libvirt.open(phys_host)
|
||||
for i in self.connection.listDomainsID():
|
||||
d = con.lookupByID(i)
|
||||
if d.name() == self.shortname:
|
||||
self.vm_domain = d
|
||||
break
|
||||
return
|
||||
|
||||
def check_power(self, state, timeout=None):
|
||||
return self.vm_domain.info[0] in [libvirt.VIR_DOMAIN_RUNNING, libvirt.VIR_DOMAIN_BLOCKED,
|
||||
libvirt.VIR_DOMAIN_PAUSED]
|
||||
|
||||
def check_status(self, timeout=None):
|
||||
return self.vm_domain.info()[0] == libvirt.VIR_DOMAIN_RUNNING
|
||||
|
||||
def power_cycle(self):
|
||||
self.vm_domain.info().destroy()
|
||||
self.vm_domain.info().create()
|
||||
|
||||
def hard_reset(self):
|
||||
self.vm_domain.info().destroy()
|
||||
|
||||
def power_on(self):
|
||||
self.vm_domain.info().create()
|
||||
|
||||
def power_off(self):
|
||||
self.vm_domain.info().destroy()
|
||||
|
||||
def power_off_for_interval(self, interval=30):
|
||||
log.info('Power off {s} for {i} seconds'.format(s=self.shortname, i=interval))
|
||||
self.vm_domain.info().destroy()
|
||||
time.sleep(interval)
|
||||
self.vm_domain.info().create()
|
||||
log.info('Power off for {i} seconds completed'.format(s=self.shortname, i=interval))
|
||||
|
||||
def getRemoteConsole(name, ipmiuser, ipmipass, ipmidomain, logfile=None, timeout=20):
|
||||
if name.startswith('vpm'):
|
||||
return VirtualConsole(name, ipmiuser, ipmipass, ipmidomain, logfile, timeout)
|
||||
return PhysicalConsole(name, ipmiuser, ipmipass, ipmidomain, logfile, timeout)
|
||||
|
@ -106,6 +106,7 @@ def lock_machines(ctx, config):
|
||||
while not keyscan_out:
|
||||
time.sleep(10)
|
||||
keyscan_out, current_locks = lock.keyscan_check(ctx, vmlist)
|
||||
log.info('virtual machine is stil unavailable')
|
||||
if lock.update_keys(ctx, keyscan_out, current_locks):
|
||||
log.info("Error in virtual machine keys")
|
||||
newscandict = {}
|
||||
@ -172,8 +173,18 @@ def connect(ctx, config):
|
||||
from ..orchestra import connection, remote
|
||||
from ..orchestra import cluster
|
||||
remotes = []
|
||||
for t, key in ctx.config['targets'].iteritems():
|
||||
machs = []
|
||||
for name in ctx.config['targets'].iterkeys():
|
||||
machs.append(name)
|
||||
lock.scan_for_locks(ctx, machs)
|
||||
for t, xkey in ctx.config['targets'].iteritems():
|
||||
log.debug('connecting to %s', t)
|
||||
log.info('Key is :%s:', xkey)
|
||||
oldkeystatus = lockstatus.get_status(ctx, t)
|
||||
key = xkey
|
||||
if 'sshpubkey' in oldkeystatus:
|
||||
log.info('possible key is :%s:',oldkeystatus['sshpubkey'])
|
||||
key = oldkeystatus['sshpubkey']
|
||||
remotes.append(
|
||||
remote.Remote(name=t,
|
||||
ssh=connection.connect(user_at_host=t,
|
||||
@ -492,16 +503,16 @@ def vm_setup(ctx, config):
|
||||
Look for virtual machines and handle their initialization
|
||||
"""
|
||||
with parallel() as p:
|
||||
editinfo = './teuthology/task/edit_sudoers.sh'
|
||||
editinfo = os.path.join(os.path.dirname(__file__),'edit_sudoers.sh')
|
||||
for remote in ctx.cluster.remotes.iterkeys():
|
||||
mname = re.match(".*@([^\.]*)\..*", str(remote)).group(1)
|
||||
if mname[0:3] == 'vpm':
|
||||
if mname.startswith('vpm'):
|
||||
r = remote.run(args=['test', '-e', '/ceph-qa-ready',],
|
||||
stdout=StringIO(),
|
||||
check_status=False,)
|
||||
if r.exitstatus != 0:
|
||||
p1 = subprocess.Popen(['cat', editinfo], stdout=subprocess.PIPE)
|
||||
p2 = subprocess.Popen(['ssh','-t','-t',str(remote)],stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||
p2 = subprocess.Popen(['ssh','-t','-t',str(remote), 'sudo', 'sh'],stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||
_,err = p2.communicate()
|
||||
if err:
|
||||
log.info("Edit of /etc/sudoers failed: %s",err)
|
||||
|
@ -105,7 +105,7 @@ def task(ctx, config):
|
||||
host = t.split('@')[-1]
|
||||
shortname = host.split('.')[0]
|
||||
from ..orchestra import remote as oremote
|
||||
console = oremote.RemoteConsole(
|
||||
console = oremote.getRemoteConsole(
|
||||
name=host,
|
||||
ipmiuser=ctx.teuthology_config['ipmi_user'],
|
||||
ipmipass=ctx.teuthology_config['ipmi_password'],
|
||||
|
Loading…
Reference in New Issue
Block a user