mirror of
https://github.com/ceph/ceph
synced 2025-01-07 11:41:48 +00:00
52aec32a7d
We had been writing 01_ceph_kernel with the kernel title, and relying on the fact that grub.cfg would never have submenus in it (implemented by a hack to /etc/grub.d/10_linux which neutered its submenu creation). However, that hack was modifying a package file, and got in the way of later apt commands. Rather than doing it that way, this divines the title of the submenu and sets the default variable to "submenu>kernel", which works to select the desired kernel. It depends on there being only one level of submenu, and on the format of the menuentry and submenu commands, dictated by grub2. None of this is likely to work at all outside Ubuntu. Fixes: #4496 Signed-off-by: Dan Mick <dan.mick@inktank.com> Reviewed-by: Dan Mick <dan.mick@inktank.com>
487 lines
16 KiB
Python
487 lines
16 KiB
Python
from cStringIO import StringIO
|
|
|
|
import logging
|
|
import re
|
|
import shlex
|
|
|
|
from teuthology import misc as teuthology
|
|
from ..orchestra import run
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
def normalize_config(ctx, config):
|
|
"""
|
|
Returns a config whose keys are all real roles.
|
|
Generic roles (client, mon, osd, etc.) are replaced with
|
|
the actual roles (client.0, client.1, etc.). If the config
|
|
specifies a different version for a specific role, this is
|
|
unchanged.
|
|
|
|
For example, with 3 OSDs this::
|
|
|
|
osd:
|
|
tag: v3.0
|
|
kdb: true
|
|
osd.1:
|
|
branch: new_btrfs
|
|
kdb: false
|
|
osd.3:
|
|
deb: /path/to/linux-whatever.deb
|
|
|
|
is transformed into::
|
|
|
|
osd.0:
|
|
tag: v3.0
|
|
kdb: true
|
|
osd.1:
|
|
branch: new_btrfs
|
|
kdb: false
|
|
osd.2:
|
|
tag: v3.0
|
|
kdb: true
|
|
osd.3:
|
|
deb: /path/to/linux-whatever.deb
|
|
|
|
If config is None or just specifies a version to use,
|
|
it is applied to all nodes.
|
|
"""
|
|
if config is None or \
|
|
len(filter(lambda x: x in ['tag', 'branch', 'sha1', 'kdb',
|
|
'deb'],
|
|
config.keys())) == len(config.keys()):
|
|
new_config = {}
|
|
if config is None:
|
|
config = {'branch': 'master'}
|
|
for _, roles_for_host in ctx.cluster.remotes.iteritems():
|
|
new_config[roles_for_host[0]] = config
|
|
return new_config
|
|
|
|
new_config = {}
|
|
for role, role_config in config.iteritems():
|
|
if role_config is None:
|
|
role_config = {'branch': 'master'}
|
|
if '.' in role:
|
|
new_config[role] = role_config
|
|
else:
|
|
for id_ in teuthology.all_roles_of_type(ctx.cluster, role):
|
|
name = '{type}.{id}'.format(type=role, id=id_)
|
|
# specific overrides generic
|
|
if name not in config:
|
|
new_config[name] = role_config
|
|
return new_config
|
|
|
|
def validate_config(ctx, config):
|
|
for _, roles_for_host in ctx.cluster.remotes.iteritems():
|
|
kernel = None
|
|
for role in roles_for_host:
|
|
role_kernel = config.get(role, kernel)
|
|
if kernel is None:
|
|
kernel = role_kernel
|
|
elif role_kernel is not None:
|
|
assert kernel == role_kernel, \
|
|
"everything on the same host must use the same kernel"
|
|
if role in config:
|
|
del config[role]
|
|
|
|
def need_to_install(ctx, role, sha1):
|
|
ret = True
|
|
log.info('Checking kernel version of {role}, want {sha1}...'.format(
|
|
role=role,
|
|
sha1=sha1))
|
|
version_fp = StringIO()
|
|
ctx.cluster.only(role).run(
|
|
args=[
|
|
'uname',
|
|
'-r',
|
|
],
|
|
stdout=version_fp,
|
|
)
|
|
version = version_fp.getvalue().rstrip('\n')
|
|
if '-g' in version:
|
|
_, current_sha1 = version.rsplit('-g', 1)
|
|
log.debug('current kernel version is: {version} sha1 {sha1}'.format(
|
|
version=version,
|
|
sha1=current_sha1))
|
|
if sha1.startswith(current_sha1):
|
|
log.debug('current sha1 is the same, do not need to install')
|
|
ret = False
|
|
else:
|
|
log.debug('current kernel version is: {version}, unknown sha1'.format(
|
|
version=version))
|
|
version_fp.close()
|
|
return ret
|
|
|
|
def install_firmware(ctx, config):
|
|
# uri = 'git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git'
|
|
uri = 'git://ceph.com/git/linux-firmware.git'
|
|
fw_dir = '/lib/firmware/updates'
|
|
|
|
for role in config.iterkeys():
|
|
(role_remote,) = ctx.cluster.only(role).remotes.keys()
|
|
log.info('Installing linux-firmware on {role}...'.format(role=role))
|
|
role_remote.run(
|
|
args=[
|
|
# kludge around mysterious 0-byte .git/HEAD files
|
|
'cd', fw_dir,
|
|
run.Raw('&&'),
|
|
'test', '-d', '.git',
|
|
run.Raw('&&'),
|
|
'test', '!', '-s', '.git/HEAD',
|
|
run.Raw('&&'),
|
|
'sudo', 'rm', '-rf', '.git',
|
|
run.Raw(';'),
|
|
# init
|
|
'sudo', 'install', '-d', '-m0755', fw_dir,
|
|
run.Raw('&&'),
|
|
'cd', fw_dir,
|
|
run.Raw('&&'),
|
|
'sudo', 'git', 'init',
|
|
],
|
|
)
|
|
role_remote.run(
|
|
args=[
|
|
'sudo', 'git', '--git-dir=%s/.git' % fw_dir, 'config',
|
|
'--get', 'remote.origin.url', run.Raw('>/dev/null'),
|
|
run.Raw('||'),
|
|
'sudo', 'git', '--git-dir=%s/.git' % fw_dir,
|
|
'remote', 'add', 'origin', uri,
|
|
],
|
|
)
|
|
role_remote.run(
|
|
args=[
|
|
'cd', fw_dir,
|
|
run.Raw('&&'),
|
|
'sudo', 'git', 'fetch', 'origin',
|
|
run.Raw('&&'),
|
|
'sudo', 'git', 'reset', '--hard', 'origin/master'
|
|
],
|
|
)
|
|
|
|
def download_deb(ctx, config):
|
|
procs = {}
|
|
for role, src in config.iteritems():
|
|
(role_remote,) = ctx.cluster.only(role).remotes.keys()
|
|
if src.find('/') >= 0:
|
|
# local deb
|
|
log.info('Copying kernel deb {path} to {role}...'.format(path=src,
|
|
role=role))
|
|
f = open(src, 'r')
|
|
proc = role_remote.run(
|
|
args=[
|
|
'python', '-c',
|
|
'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
|
|
'/tmp/linux-image.deb',
|
|
],
|
|
wait=False,
|
|
stdin=f
|
|
)
|
|
procs[role_remote.name] = proc
|
|
|
|
else:
|
|
log.info('Downloading kernel {sha1} on {role}...'.format(sha1=src,
|
|
role=role))
|
|
_, deb_url = teuthology.get_ceph_binary_url(
|
|
package='kernel',
|
|
sha1=src,
|
|
format='deb',
|
|
flavor='basic',
|
|
arch='x86_64',
|
|
dist='precise',
|
|
)
|
|
|
|
log.info('fetching kernel from {url}'.format(url=deb_url))
|
|
proc = role_remote.run(
|
|
args=[
|
|
'sudo', 'rm', '-f', '/tmp/linux-image.deb',
|
|
run.Raw('&&'),
|
|
'echo',
|
|
'linux-image.deb',
|
|
run.Raw('|'),
|
|
'wget',
|
|
'-nv',
|
|
'-O',
|
|
'/tmp/linux-image.deb',
|
|
'--base={url}'.format(url=deb_url),
|
|
'--input-file=-',
|
|
],
|
|
wait=False)
|
|
procs[role_remote.name] = proc
|
|
|
|
for name, proc in procs.iteritems():
|
|
log.debug('Waiting for download/copy to %s to complete...', name)
|
|
proc.exitstatus.get()
|
|
|
|
|
|
def install_and_reboot(ctx, config):
|
|
procs = {}
|
|
for role, src in config.iteritems():
|
|
log.info('Installing kernel {src} on {role}...'.format(src=src,
|
|
role=role))
|
|
(role_remote,) = ctx.cluster.only(role).remotes.keys()
|
|
proc = role_remote.run(
|
|
args=[
|
|
# install the kernel deb
|
|
'sudo',
|
|
'dpkg',
|
|
'-i',
|
|
'/tmp/linux-image.deb',
|
|
],
|
|
)
|
|
|
|
# collect kernel image name from the .deb
|
|
cmdout = StringIO()
|
|
proc = role_remote.run(
|
|
args=[
|
|
# extract the actual boot image name from the deb
|
|
'dpkg-deb',
|
|
'--fsys-tarfile',
|
|
'/tmp/linux-image.deb',
|
|
run.Raw('|'),
|
|
'tar',
|
|
'-t',
|
|
'-v',
|
|
'-f', '-',
|
|
'--wildcards',
|
|
'--',
|
|
'./boot/vmlinuz-*',
|
|
run.Raw('|'),
|
|
'sed',
|
|
r'-e s;.*\./boot/vmlinuz-;;',
|
|
],
|
|
stdout = cmdout,
|
|
)
|
|
kernel_title = cmdout.getvalue().rstrip()
|
|
cmdout.close()
|
|
log.info('searching for kernel {}'.format(kernel_title))
|
|
|
|
# look for menuentry for our kernel, and collect any
|
|
# submenu entries for their titles. Assume that if our
|
|
# kernel entry appears later in the file than a submenu entry,
|
|
# it's actually nested under that submenu. If it gets more
|
|
# complex this will totally break.
|
|
cmdout = StringIO()
|
|
proc = role_remote.run(
|
|
args=[
|
|
'egrep',
|
|
'(submenu|menuentry.*' + kernel_title + ').*{',
|
|
'/boot/grub/grub.cfg'
|
|
],
|
|
stdout = cmdout,
|
|
)
|
|
submenu_title = ''
|
|
default_title = ''
|
|
for l in cmdout.getvalue().split('\n'):
|
|
fields = shlex.split(l)
|
|
if len(fields) >= 2:
|
|
command, title = fields[:2]
|
|
if command == 'submenu':
|
|
submenu_title = title + '>'
|
|
if command == 'menuentry':
|
|
if title.endswith(kernel_title):
|
|
default_title = title
|
|
break
|
|
cmdout.close()
|
|
log.info('submenu_title:{}'.format(submenu_title))
|
|
log.info('default_title:{}'.format(default_title))
|
|
|
|
proc = role_remote.run(
|
|
args=[
|
|
# use the title(s) to construct the content of
|
|
# the grub menu entry, so we can default to it.
|
|
'/bin/echo',
|
|
'-e',
|
|
r'cat <<EOF\nset default="' + submenu_title + \
|
|
default_title + r'"\nEOF\n',
|
|
# make it look like an emacs backup file so
|
|
# unfortunately timed update-grub runs don't pick it
|
|
# up yet; use sudo tee so we are able to write to /etc
|
|
run.Raw('|'),
|
|
'sudo',
|
|
'tee',
|
|
'--',
|
|
'/etc/grub.d/01_ceph_kernel.tmp~',
|
|
run.Raw('>/dev/null'),
|
|
run.Raw('&&'),
|
|
'sudo',
|
|
'chmod',
|
|
'a+x',
|
|
'--',
|
|
'/etc/grub.d/01_ceph_kernel.tmp~',
|
|
run.Raw('&&'),
|
|
'sudo',
|
|
'mv',
|
|
'--',
|
|
'/etc/grub.d/01_ceph_kernel.tmp~',
|
|
'/etc/grub.d/01_ceph_kernel',
|
|
# update grub again so it accepts our default
|
|
run.Raw('&&'),
|
|
'sudo',
|
|
'update-grub',
|
|
run.Raw('&&'),
|
|
'rm',
|
|
'/tmp/linux-image.deb',
|
|
run.Raw('&&'),
|
|
'sudo',
|
|
'shutdown',
|
|
'-r',
|
|
'now',
|
|
],
|
|
wait=False,
|
|
)
|
|
procs[role_remote.name] = proc
|
|
|
|
for name, proc in procs.iteritems():
|
|
log.debug('Waiting for install on %s to complete...', name)
|
|
proc.exitstatus.get()
|
|
|
|
def enable_disable_kdb(ctx, config):
|
|
for role, enable in config.iteritems():
|
|
(role_remote,) = ctx.cluster.only(role).remotes.keys()
|
|
if "mira" in role_remote.name:
|
|
serialdev = "ttyS2"
|
|
else:
|
|
serialdev = "ttyS1"
|
|
if enable:
|
|
log.info('Enabling kdb on {role}...'.format(role=role))
|
|
role_remote.run(
|
|
args=[
|
|
'echo', serialdev,
|
|
run.Raw('|'),
|
|
'sudo', 'tee', '/sys/module/kgdboc/parameters/kgdboc'
|
|
])
|
|
else:
|
|
log.info('Disabling kdb on {role}...'.format(role=role))
|
|
role_remote.run(
|
|
args=[
|
|
'echo', '',
|
|
run.Raw('|'),
|
|
'sudo', 'tee', '/sys/module/kgdboc/parameters/kgdboc'
|
|
])
|
|
|
|
def wait_for_reboot(ctx, need_install, timeout):
|
|
"""
|
|
Loop reconnecting and checking kernel versions until
|
|
they're all correct or the timeout is exceeded.
|
|
"""
|
|
import time
|
|
starttime = time.time()
|
|
while need_install:
|
|
teuthology.reconnect(ctx, timeout)
|
|
for client in need_install.keys():
|
|
log.info('Checking client {client} for new kernel version...'.format(client=client))
|
|
try:
|
|
assert not need_to_install(ctx, client, need_install[client]), \
|
|
'failed to install new kernel version within timeout'
|
|
del need_install[client]
|
|
except:
|
|
# ignore connection resets and asserts while time is left
|
|
if time.time() - starttime > timeout:
|
|
raise
|
|
time.sleep(1)
|
|
|
|
|
|
def task(ctx, config):
|
|
"""
|
|
Make sure the specified kernel is installed.
|
|
This can be a branch, tag, or sha1 of ceph-client.git.
|
|
|
|
To install the kernel from the master branch on all hosts::
|
|
|
|
kernel:
|
|
tasks:
|
|
- ceph:
|
|
|
|
To wait 5 minutes for hosts to reboot::
|
|
|
|
kernel:
|
|
timeout: 300
|
|
tasks:
|
|
- ceph:
|
|
|
|
To specify different kernels for each client::
|
|
|
|
kernel:
|
|
client.0:
|
|
branch: foo
|
|
client.1:
|
|
tag: v3.0rc1
|
|
client.2:
|
|
sha1: db3540522e955c1ebb391f4f5324dff4f20ecd09
|
|
tasks:
|
|
- ceph:
|
|
|
|
You can specify a branch, tag, or sha1 for all roles
|
|
of a certain type (more specific roles override this)::
|
|
|
|
kernel:
|
|
client:
|
|
tag: v3.0
|
|
osd:
|
|
branch: btrfs_fixes
|
|
client.1:
|
|
branch: more_specific_branch
|
|
osd.3:
|
|
branch: master
|
|
|
|
To enable kdb::
|
|
|
|
kernel:
|
|
kdb: true
|
|
|
|
"""
|
|
assert config is None or isinstance(config, dict), \
|
|
"task kernel only supports a dictionary for configuration"
|
|
|
|
timeout = 300
|
|
if config is not None and 'timeout' in config:
|
|
timeout = config.pop('timeout')
|
|
|
|
config = normalize_config(ctx, config)
|
|
validate_config(ctx, config)
|
|
log.info('config %s' % config)
|
|
|
|
need_install = {} # sha1 to dl, or path to deb
|
|
need_sha1 = {} # sha1
|
|
kdb = {}
|
|
for role, role_config in config.iteritems():
|
|
if role_config.get('deb'):
|
|
path = role_config.get('deb')
|
|
match = re.search('\d+-g(\w{7})', path)
|
|
if match:
|
|
sha1 = match.group(1)
|
|
log.info('kernel deb sha1 appears to be %s', sha1)
|
|
if need_to_install(ctx, role, sha1):
|
|
need_install[role] = path
|
|
need_sha1[role] = sha1
|
|
else:
|
|
log.info('unable to extract sha1 from deb path, forcing install')
|
|
assert False
|
|
else:
|
|
sha1, _ = teuthology.get_ceph_binary_url(
|
|
package='kernel',
|
|
branch=role_config.get('branch'),
|
|
tag=role_config.get('tag'),
|
|
sha1=role_config.get('sha1'),
|
|
flavor='basic',
|
|
format='deb',
|
|
dist='precise',
|
|
arch='x86_64',
|
|
)
|
|
log.debug('sha1 for {role} is {sha1}'.format(role=role, sha1=sha1))
|
|
ctx.summary['{role}-kernel-sha1'.format(role=role)] = sha1
|
|
if need_to_install(ctx, role, sha1):
|
|
need_install[role] = sha1
|
|
need_sha1[role] = sha1
|
|
|
|
# enable or disable kdb if specified, otherwise do not touch
|
|
if role_config.get('kdb') is not None:
|
|
kdb[role] = role_config.get('kdb')
|
|
|
|
if need_install:
|
|
install_firmware(ctx, need_install)
|
|
download_deb(ctx, need_install)
|
|
install_and_reboot(ctx, need_install)
|
|
wait_for_reboot(ctx, need_sha1, timeout)
|
|
|
|
enable_disable_kdb(ctx, kdb)
|