ceph-disk: workaround gperftool hang

Temporary workaround: if ceph-osd --mkfs does not
complete within 5 minutes, assume it is blocked
because of https://github.com/gperftools/gperftools/issues/786

References http://tracker.ceph.com/issues/13522

Signed-off-by: Loic Dachary <loic@dachary.org>
This commit is contained in:
Loic Dachary 2016-05-26 12:55:51 +02:00
parent 136efee7a5
commit c092321c24
2 changed files with 60 additions and 4 deletions

View File

@ -2642,6 +2642,36 @@ class PrepareBluestoreData(PrepareData):
write_one_line(path, 'type', 'bluestore')
#
# Temporary workaround: if ceph-osd --mkfs does not
# complete within 5 minutes, assume it is blocked
# because of http://tracker.ceph.com/issues/13522
# and retry a few times.
#
# Remove this function calls with command_check_call
# when http://tracker.ceph.com/issues/13522 is fixed
#
def ceph_osd_mkfs(arguments):
timeout = _get_command_executable(['timeout'])
mkfs_ok = False
error = 'unknown error'
for delay in os.environ.get('CEPH_OSD_MKFS_DELAYS',
'300 300 300 300 300').split():
try:
_check_output(timeout + [delay] + arguments)
mkfs_ok = True
break
except subprocess.CalledProcessError as e:
error = e.output
if e.returncode == 124: # timeout fired, retry
LOG.debug('%s timed out : %s (retry)'
% (str(arguments), error))
else:
break
if not mkfs_ok:
raise Error('%s failed : %s' % (str(arguments), error))
def mkfs(
path,
cluster,
@ -2663,7 +2693,7 @@ def mkfs(
osd_type = read_one_line(path, 'type')
if osd_type == 'bluestore':
command_check_call(
ceph_osd_mkfs(
[
'ceph-osd',
'--cluster', cluster,
@ -2679,7 +2709,7 @@ def mkfs(
],
)
else:
command_check_call(
ceph_osd_mkfs(
[
'ceph-osd',
'--cluster', cluster,

View File

@ -1,7 +1,7 @@
#!/bin/bash
#
# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
# Copyright (C) 2014, 2015, 2016 Red Hat <contact@redhat.com>
#
# Author: Loic Dachary <loic@dachary.org>
#
@ -153,7 +153,7 @@ function tweak_path() {
command_fixture ceph-conf || return 1
command_fixture ceph-osd || return 1
test_activate_dir
test_activate_dir || return 1
[ ! -f $DIR/used-ceph-conf ] || return 1
[ ! -f $DIR/used-ceph-osd ] || return 1
@ -357,6 +357,31 @@ function test_keyring_path() {
grep --quiet "keyring $DIR/bootstrap-osd/ceph.keyring" $DIR/test_keyring || return 1
}
# http://tracker.ceph.com/issues/13522
function ceph_osd_fail_once_fixture() {
local command=ceph-osd
local fpath=`readlink -f $(which $command)`
[ "$fpath" = `readlink -f ../$command` ] || [ "$fpath" = `readlink -f $(pwd)/$command` ] || return 1
cat > $DIR/$command <<EOF
#!/bin/bash
if echo "\$@" | grep -e --mkfs && ! test -f $DIR/used-$command ; then
touch $DIR/used-$command
# sleep longer than the first CEPH_OSD_MKFS_DELAYS value (5) below
sleep 600
else
exec ../$command "\$@"
fi
EOF
chmod +x $DIR/$command
}
function test_ceph_osd_mkfs() {
ceph_osd_fail_once_fixture || return 1
CEPH_OSD_MKFS_DELAYS='5 300 300' use_path test_activate_dir || return 1
[ -f $DIR/used-ceph-osd ] || return 1
}
function run() {
local default_actions
default_actions+="test_path "
@ -369,6 +394,7 @@ function run() {
default_actions+="test_mark_init "
default_actions+="test_zap "
default_actions+="test_activate_dir_bluestore "
default_actions+="test_ceph_osd_mkfs "
local actions=${@:-$default_actions}
local status
for action in $actions ; do