ceph/teuthology/task/kernel.py
Josh Durgin 96e89d30ec kernel: loop reconnecting in case we race with shutdown
Previously, if we reconnected before shutdown completed we asserted
that the kernel did not boot into the new version, when we just needed
to wait for the machine to reboot.
2012-01-12 13:02:22 -08:00

266 lines
8.4 KiB
Python

from cStringIO import StringIO
import logging
from teuthology import misc as teuthology
from ..orchestra import run
log = logging.getLogger(__name__)
def normalize_config(ctx, config):
"""
Returns a config whose keys are all real roles.
Generic roles (client, mon, osd, etc.) are replaced with
the actual roles (client.0, client.1, etc.). If the config
specifies a different version for a specific role, this is
unchanged.
For example, with 3 OSDs this::
osd:
tag: v3.0
osd.1:
branch: new_btrfs
is transformed into::
osd.0:
tag: v3.0
osd.1:
branch: new_btrfs
osd.2:
tag: v3.0
If config is None or just specifies a version to use,
it is applied to all nodes.
"""
if config is None or \
len(config) == 1 and config.keys() in [['tag'], ['branch'], ['sha1']]:
new_config = {}
if config is None:
config = {'branch': 'master'}
for _, roles_for_host in ctx.cluster.remotes.iteritems():
new_config[roles_for_host[0]] = config
return new_config
new_config = {}
for role, role_config in config.iteritems():
if role_config is None:
role_config = {'branch': 'master'}
if '.' in role:
new_config[role] = role_config
else:
for id_ in teuthology.all_roles_of_type(ctx.cluster, role):
name = '{type}.{id}'.format(type=role, id=id_)
# specific overrides generic
if name not in config:
new_config[name] = role_config
return new_config
def validate_config(ctx, config):
for _, roles_for_host in ctx.cluster.remotes.iteritems():
kernel = None
for role in roles_for_host:
role_kernel = config.get(role, kernel)
if kernel is None:
kernel = role_kernel
elif role_kernel is not None:
assert kernel == role_kernel, \
"everything on the same host must use the same kernel"
if role in config:
del config[role]
def need_to_install(ctx, role, sha1):
ret = True
log.info('Checking kernel version of {role}...'.format(role=role))
version_fp = StringIO()
ctx.cluster.only(role).run(
args=[
'uname',
'-r',
],
stdout=version_fp,
)
version = version_fp.getvalue().rstrip('\n')
log.debug('current kernel version is: {version}'.format(version=version))
if '-g' in version:
_, current_sha1 = version.rsplit('-g', 1)
if sha1.startswith(current_sha1):
log.debug('current sha1 is the same, do not need to install')
ret = False
version_fp.close()
return ret
def install_and_reboot(ctx, config):
procs = {}
for role, sha1 in config.iteritems():
log.info('Installing kernel version {sha1} on {role}...'.format(sha1=sha1,
role=role))
(role_remote,) = ctx.cluster.only(role).remotes.keys()
_, deb_url = teuthology.get_ceph_binary_url(sha1=sha1, flavor='kernel')
log.info('fetching kernel from {url}'.format(url=deb_url))
proc = role_remote.run(
args=[
'echo',
'linux-image.deb',
run.Raw('|'),
'wget',
'-nv',
'-O',
'/tmp/linux-image.deb',
'--base={url}'.format(url=deb_url),
'--input-file=-',
run.Raw('&&'),
'sudo',
'dpkg',
'-i',
'/tmp/linux-image.deb',
run.Raw('&&'),
# and now extract the actual boot image name from the deb
'dpkg-deb',
'--fsys-tarfile',
'/tmp/linux-image.deb',
run.Raw('|'),
'tar',
'-t',
'-v',
'-f', '-',
'--wildcards',
'--',
'./boot/vmlinuz-*',
run.Raw('|'),
# we can only rely on mawk being installed, so just call it explicitly
'mawk',
# and use the image name to construct the content of
# the grub menu entry, so we can default to it;
# hardcoded to assume Ubuntu, English, etc.
r'{sub("^\\./boot/vmlinuz-", "", $6); print "cat <<EOF\n" "set default=\"Ubuntu, with Linux " $6 "\"\n" "EOF"}',
# make it look like an emacs backup file so
# unfortunately timed update-grub runs don't pick it
# up yet; use sudo tee so we are able to write to /etc
run.Raw('|'),
'sudo',
'tee',
'--',
'/etc/grub.d/01_ceph_kernel.tmp~',
run.Raw('>/dev/null'),
run.Raw('&&'),
'sudo',
'chmod',
'a+x',
'--',
'/etc/grub.d/01_ceph_kernel.tmp~',
run.Raw('&&'),
'sudo',
'mv',
'--',
'/etc/grub.d/01_ceph_kernel.tmp~',
'/etc/grub.d/01_ceph_kernel',
run.Raw('&&'),
'sudo',
'update-grub',
run.Raw('&&'),
'rm',
'/tmp/linux-image.deb',
run.Raw('&&'),
'sudo',
'shutdown',
'-r',
'now',
],
wait=False,
)
procs[role_remote.name] = proc
for name, proc in procs.iteritems():
log.debug('Waiting for install on %s to complete...', name)
proc.exitstatus.get()
def wait_for_reboot(ctx, need_install, timeout):
"""
Loop reconnecting and checking kernel versions until
they're all correct or the timeout is exceeded.
"""
import time
starttime = time.time()
while need_install:
teuthology.reconnect(ctx, timeout)
for client in need_install.keys():
log.info('Checking client {client} for new kernel version...'.format(client=client))
if need_to_install(ctx, client, need_install[client]):
assert time.time() - starttime < timeout, \
'failed to install new kernel version within timeout'
else:
del need_install[client]
time.sleep(1)
def task(ctx, config):
"""
Make sure the specified kernel is installed.
This can be a branch, tag, or sha1 of ceph-client.git.
To install the kernel from the master branch on all hosts::
kernel:
tasks:
- ceph:
To wait 5 minutes for hosts to reboot::
kernel:
timeout: 300
tasks:
- ceph:
To specify different kernels for each client::
kernel:
client.0:
branch: foo
client.1:
tag: v3.0rc1
client.2:
sha1: db3540522e955c1ebb391f4f5324dff4f20ecd09
tasks:
- ceph:
You can specify a branch, tag, or sha1 for all roles
of a certain type (more specific roles override this)::
kernel:
client:
tag: v3.0
osd:
branch: btrfs_fixes
client.1:
branch: more_specific_branch
osd.3:
branch: master
"""
assert config is None or isinstance(config, dict), \
"task kernel only supports a dictionary for configuration"
timeout = 300
if config is not None and 'timeout' in config:
timeout = config.pop('timeout')
config = normalize_config(ctx, config)
validate_config(ctx, config)
need_install = {}
for role, role_config in config.iteritems():
sha1, _ = teuthology.get_ceph_binary_url(
branch=role_config.get('branch'),
tag=role_config.get('tag'),
sha1=role_config.get('sha1'),
flavor='kernel',
)
log.debug('sha1 for {role} is {sha1}'.format(role=role, sha1=sha1))
ctx.summary['{role}-kernel-sha1'.format(role=role)] = sha1
if need_to_install(ctx, role, sha1):
need_install[role] = sha1
if need_install:
install_and_reboot(ctx, need_install)
wait_for_reboot(ctx, need_install, timeout)