mirror of
https://github.com/ceph/ceph
synced 2024-12-18 09:25:49 +00:00
ceph-disk: workaround gperftool hang
Temporary workaround: if ceph-osd --mkfs does not complete within 5 minutes, assume it is blocked because of https://github.com/gperftools/gperftools/issues/786 References http://tracker.ceph.com/issues/13522 Signed-off-by: Loic Dachary <loic@dachary.org>
This commit is contained in:
parent
136efee7a5
commit
c092321c24
@ -2642,6 +2642,36 @@ class PrepareBluestoreData(PrepareData):
|
||||
write_one_line(path, 'type', 'bluestore')
|
||||
|
||||
|
||||
#
|
||||
# Temporary workaround: if ceph-osd --mkfs does not
|
||||
# complete within 5 minutes, assume it is blocked
|
||||
# because of http://tracker.ceph.com/issues/13522
|
||||
# and retry a few times.
|
||||
#
|
||||
# Remove this function calls with command_check_call
|
||||
# when http://tracker.ceph.com/issues/13522 is fixed
|
||||
#
|
||||
def ceph_osd_mkfs(arguments):
|
||||
timeout = _get_command_executable(['timeout'])
|
||||
mkfs_ok = False
|
||||
error = 'unknown error'
|
||||
for delay in os.environ.get('CEPH_OSD_MKFS_DELAYS',
|
||||
'300 300 300 300 300').split():
|
||||
try:
|
||||
_check_output(timeout + [delay] + arguments)
|
||||
mkfs_ok = True
|
||||
break
|
||||
except subprocess.CalledProcessError as e:
|
||||
error = e.output
|
||||
if e.returncode == 124: # timeout fired, retry
|
||||
LOG.debug('%s timed out : %s (retry)'
|
||||
% (str(arguments), error))
|
||||
else:
|
||||
break
|
||||
if not mkfs_ok:
|
||||
raise Error('%s failed : %s' % (str(arguments), error))
|
||||
|
||||
|
||||
def mkfs(
|
||||
path,
|
||||
cluster,
|
||||
@ -2663,7 +2693,7 @@ def mkfs(
|
||||
osd_type = read_one_line(path, 'type')
|
||||
|
||||
if osd_type == 'bluestore':
|
||||
command_check_call(
|
||||
ceph_osd_mkfs(
|
||||
[
|
||||
'ceph-osd',
|
||||
'--cluster', cluster,
|
||||
@ -2679,7 +2709,7 @@ def mkfs(
|
||||
],
|
||||
)
|
||||
else:
|
||||
command_check_call(
|
||||
ceph_osd_mkfs(
|
||||
[
|
||||
'ceph-osd',
|
||||
'--cluster', cluster,
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
|
||||
# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
|
||||
# Copyright (C) 2014, 2015, 2016 Red Hat <contact@redhat.com>
|
||||
#
|
||||
# Author: Loic Dachary <loic@dachary.org>
|
||||
#
|
||||
@ -153,7 +153,7 @@ function tweak_path() {
|
||||
command_fixture ceph-conf || return 1
|
||||
command_fixture ceph-osd || return 1
|
||||
|
||||
test_activate_dir
|
||||
test_activate_dir || return 1
|
||||
|
||||
[ ! -f $DIR/used-ceph-conf ] || return 1
|
||||
[ ! -f $DIR/used-ceph-osd ] || return 1
|
||||
@ -357,6 +357,31 @@ function test_keyring_path() {
|
||||
grep --quiet "keyring $DIR/bootstrap-osd/ceph.keyring" $DIR/test_keyring || return 1
|
||||
}
|
||||
|
||||
# http://tracker.ceph.com/issues/13522
|
||||
function ceph_osd_fail_once_fixture() {
|
||||
local command=ceph-osd
|
||||
local fpath=`readlink -f $(which $command)`
|
||||
[ "$fpath" = `readlink -f ../$command` ] || [ "$fpath" = `readlink -f $(pwd)/$command` ] || return 1
|
||||
|
||||
cat > $DIR/$command <<EOF
|
||||
#!/bin/bash
|
||||
if echo "\$@" | grep -e --mkfs && ! test -f $DIR/used-$command ; then
|
||||
touch $DIR/used-$command
|
||||
# sleep longer than the first CEPH_OSD_MKFS_DELAYS value (5) below
|
||||
sleep 600
|
||||
else
|
||||
exec ../$command "\$@"
|
||||
fi
|
||||
EOF
|
||||
chmod +x $DIR/$command
|
||||
}
|
||||
|
||||
function test_ceph_osd_mkfs() {
|
||||
ceph_osd_fail_once_fixture || return 1
|
||||
CEPH_OSD_MKFS_DELAYS='5 300 300' use_path test_activate_dir || return 1
|
||||
[ -f $DIR/used-ceph-osd ] || return 1
|
||||
}
|
||||
|
||||
function run() {
|
||||
local default_actions
|
||||
default_actions+="test_path "
|
||||
@ -369,6 +394,7 @@ function run() {
|
||||
default_actions+="test_mark_init "
|
||||
default_actions+="test_zap "
|
||||
default_actions+="test_activate_dir_bluestore "
|
||||
default_actions+="test_ceph_osd_mkfs "
|
||||
local actions=${@:-$default_actions}
|
||||
local status
|
||||
for action in $actions ; do
|
||||
|
Loading…
Reference in New Issue
Block a user