mirror of
https://github.com/ceph/ceph
synced 2025-01-15 23:43:06 +00:00
bd6e3e5f1f
I'm seeing sporadic single thread deadlocks on fio stat_mutex during krbd thrash runs: (gdb) info threads Id Target Id Frame * 1 Thread 0x7f89ee730740 (LWP 15604) 0x00007f89ed9f41bd in __lll_lock_wait () from /lib64/libpthread.so.0 (gdb) bt #0 0x00007f89ed9f41bd in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00007f89ed9f17b2 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 #2 0x00000000004429b9 in fio_mutex_down (mutex=0x7f89ee72d000) at mutex.c:170 #3 0x0000000000459704 in thread_main (data=<optimized out>) at backend.c:1639 #4 0x000000000045b013 in fork_main (offset=0, shmid=<optimized out>, sk_out=0x0) at backend.c:1778 #5 run_threads (sk_out=sk_out@entry=0x0) at backend.c:2195 #6 0x000000000045b47f in fio_backend (sk_out=sk_out@entry=0x0) at backend.c:2400 #7 0x000000000040cb0c in main (argc=2, argv=0x7fffad3e3888, envp=<optimized out>) at fio.c:63 (gdb) up 2 170 pthread_cond_wait(&mutex->cond, &mutex->lock); (gdb) p mutex.lock.__data.__owner $1 = 15604 Upgrading to 2.21 seems to make these go away. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
227 lines
9.3 KiB
Python
227 lines
9.3 KiB
Python
"""
|
|
Long running fio tests on rbd mapped devices for format/features provided in config
|
|
Many fio parameters can be configured so that this task can be used along with thrash/power-cut tests
|
|
and exercise IO on full disk for all format/features
|
|
- This test should not be run on VM due to heavy use of resource
|
|
|
|
"""
|
|
import contextlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import StringIO
|
|
|
|
from teuthology.parallel import parallel
|
|
from teuthology import misc as teuthology
|
|
from tempfile import NamedTemporaryFile
|
|
from teuthology.orchestra import run
|
|
from teuthology.packaging import install_package, remove_package
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
@contextlib.contextmanager
|
|
def task(ctx, config):
|
|
"""
|
|
client.0:
|
|
fio-io-size: 100g or 80% or 100m
|
|
fio-version: 2.2.9
|
|
formats: [2]
|
|
features: [[layering],[striping],[layering,exclusive-lock,object-map]]
|
|
test-clone-io: 1 #remove this option to not run create rbd clone and not run io on clone
|
|
io-engine: "sync or rbd or any io-engine"
|
|
rw: randrw
|
|
client.1:
|
|
fio-io-size: 100g
|
|
fio-version: 2.2.9
|
|
rw: read
|
|
image-size:20480
|
|
|
|
or
|
|
all:
|
|
fio-io-size: 400g
|
|
rw: randrw
|
|
formats: [2]
|
|
features: [[layering],[striping]]
|
|
io-engine: libaio
|
|
|
|
Create rbd image + device and exercise IO for format/features provided in config file
|
|
Config can be per client or one config can be used for all clients, fio jobs are run in parallel for client provided
|
|
|
|
"""
|
|
if config.get('all'):
|
|
client_config = config['all']
|
|
clients = ctx.cluster.only(teuthology.is_type('client'))
|
|
rbd_test_dir = teuthology.get_testdir(ctx) + "/rbd_fio_test"
|
|
for remote,role in clients.remotes.iteritems():
|
|
if 'client_config' in locals():
|
|
with parallel() as p:
|
|
p.spawn(run_fio, remote, client_config, rbd_test_dir)
|
|
else:
|
|
for client_config in config:
|
|
if client_config in role:
|
|
with parallel() as p:
|
|
p.spawn(run_fio, remote, config[client_config], rbd_test_dir)
|
|
|
|
yield
|
|
|
|
|
|
def get_ioengine_package_name(ioengine, remote):
|
|
system_type = teuthology.get_system_type(remote)
|
|
if ioengine == 'rbd':
|
|
return 'librbd1-devel' if system_type == 'rpm' else 'librbd-dev'
|
|
elif ioengine == 'libaio':
|
|
return 'libaio-devel' if system_type == 'rpm' else 'libaio-dev'
|
|
else:
|
|
return None
|
|
|
|
|
|
def run_rbd_map(remote, image, iodepth):
|
|
iodepth = max(iodepth, 128) # RBD_QUEUE_DEPTH_DEFAULT
|
|
out = StringIO.StringIO()
|
|
remote.run(args=['sudo', 'rbd', 'map', '-o', 'queue_depth={}'.format(iodepth), image], stdout=out)
|
|
dev = out.getvalue().rstrip('\n')
|
|
teuthology.sudo_write_file(
|
|
remote,
|
|
'/sys/block/{}/queue/nr_requests'.format(os.path.basename(dev)),
|
|
str(iodepth))
|
|
return dev
|
|
|
|
|
|
def run_fio(remote, config, rbd_test_dir):
|
|
"""
|
|
create fio config file with options based on above config
|
|
get the fio from github, generate binary, and use it to run on
|
|
the generated fio config file
|
|
"""
|
|
fio_config=NamedTemporaryFile(prefix='fio_rbd_', dir='/tmp/', delete=False)
|
|
fio_config.write('[global]\n')
|
|
if config.get('io-engine'):
|
|
ioengine=config['io-engine']
|
|
fio_config.write('ioengine={ioe}\n'.format(ioe=ioengine))
|
|
else:
|
|
fio_config.write('ioengine=sync\n')
|
|
if config.get('bs'):
|
|
bs=config['bs']
|
|
fio_config.write('bs={bs}\n'.format(bs=bs))
|
|
else:
|
|
fio_config.write('bs=4k\n')
|
|
iodepth = config.get('io-depth', 2)
|
|
fio_config.write('iodepth={iod}\n'.format(iod=iodepth))
|
|
if config.get('fio-io-size'):
|
|
size=config['fio-io-size']
|
|
fio_config.write('size={size}\n'.format(size=size))
|
|
else:
|
|
fio_config.write('size=100m\n')
|
|
|
|
fio_config.write('time_based\n')
|
|
if config.get('runtime'):
|
|
runtime=config['runtime']
|
|
fio_config.write('runtime={runtime}\n'.format(runtime=runtime))
|
|
else:
|
|
fio_config.write('runtime=1800\n')
|
|
fio_config.write('allow_file_create=0\n')
|
|
image_size=10240
|
|
if config.get('image_size'):
|
|
image_size=config['image_size']
|
|
|
|
formats=[1,2]
|
|
features=[['layering'],['striping'],['exclusive-lock','object-map']]
|
|
fio_version='2.21'
|
|
if config.get('formats'):
|
|
formats=config['formats']
|
|
if config.get('features'):
|
|
features=config['features']
|
|
if config.get('fio-version'):
|
|
fio_version=config['fio-version']
|
|
|
|
# handle package required for ioengine, if any
|
|
sn=remote.shortname
|
|
ioengine_pkg = get_ioengine_package_name(ioengine, remote)
|
|
if ioengine_pkg:
|
|
install_package(ioengine_pkg, remote)
|
|
|
|
fio_config.write('norandommap\n')
|
|
if ioengine == 'rbd':
|
|
fio_config.write('clientname=admin\n')
|
|
fio_config.write('pool=rbd\n')
|
|
fio_config.write('invalidate=0\n')
|
|
elif ioengine == 'libaio':
|
|
fio_config.write('direct=1\n')
|
|
for frmt in formats:
|
|
for feature in features:
|
|
log.info("Creating rbd images on {sn}".format(sn=sn))
|
|
feature_name = '-'.join(feature)
|
|
rbd_name = 'i{i}f{f}{sn}'.format(i=frmt,f=feature_name,sn=sn)
|
|
rbd_snap_name = 'i{i}f{f}{sn}@i{i}f{f}{sn}Snap'.format(i=frmt,f=feature_name,sn=sn)
|
|
rbd_clone_name = 'i{i}f{f}{sn}Clone'.format(i=frmt,f=feature_name,sn=sn)
|
|
create_args=['rbd', 'create',
|
|
'--size', '{size}'.format(size=image_size),
|
|
'--image', rbd_name,
|
|
'--image-format', '{f}'.format(f=frmt)]
|
|
map(lambda x: create_args.extend(['--image-feature', x]), feature)
|
|
remote.run(args=create_args)
|
|
remote.run(args=['rbd', 'info', rbd_name])
|
|
if ioengine != 'rbd':
|
|
rbd_dev = run_rbd_map(remote, rbd_name, iodepth)
|
|
if config.get('test-clone-io'):
|
|
log.info("Testing clones using fio")
|
|
remote.run(args=['rbd', 'snap', 'create', rbd_snap_name])
|
|
remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name])
|
|
remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name])
|
|
rbd_clone_dev = run_rbd_map(remote, rbd_clone_name, iodepth)
|
|
fio_config.write('[{rbd_dev}]\n'.format(rbd_dev=rbd_dev))
|
|
if config.get('rw'):
|
|
rw=config['rw']
|
|
fio_config.write('rw={rw}\n'.format(rw=rw))
|
|
else:
|
|
fio_config .write('rw=randrw\n')
|
|
fio_config.write('filename={rbd_dev}\n'.format(rbd_dev=rbd_dev))
|
|
if config.get('test-clone-io'):
|
|
fio_config.write('[{rbd_clone_dev}]\n'.format(rbd_clone_dev=rbd_clone_dev))
|
|
fio_config.write('rw={rw}\n'.format(rw=rw))
|
|
fio_config.write('filename={rbd_clone_dev}\n'.format(rbd_clone_dev=rbd_clone_dev))
|
|
else:
|
|
if config.get('test-clone-io'):
|
|
log.info("Testing clones using fio")
|
|
remote.run(args=['rbd', 'snap', 'create', rbd_snap_name])
|
|
remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name])
|
|
remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name])
|
|
fio_config.write('[{img_name}]\n'.format(img_name=rbd_name))
|
|
if config.get('rw'):
|
|
rw=config['rw']
|
|
fio_config.write('rw={rw}\n'.format(rw=rw))
|
|
else:
|
|
fio_config.write('rw=randrw\n')
|
|
fio_config.write('rbdname={img_name}\n'.format(img_name=rbd_name))
|
|
if config.get('test-clone-io'):
|
|
fio_config.write('[{clone_img_name}]\n'.format(clone_img_name=rbd_clone_name))
|
|
fio_config.write('rw={rw}\n'.format(rw=rw))
|
|
fio_config.write('rbdname={clone_img_name}\n'.format(clone_img_name=rbd_clone_name))
|
|
|
|
|
|
fio_config.close()
|
|
remote.put_file(fio_config.name,fio_config.name)
|
|
try:
|
|
log.info("Running rbd feature - fio test on {sn}".format(sn=sn))
|
|
fio = "https://github.com/axboe/fio/archive/fio-" + fio_version + ".tar.gz"
|
|
remote.run(args=['mkdir', run.Raw(rbd_test_dir),])
|
|
remote.run(args=['cd' , run.Raw(rbd_test_dir),
|
|
run.Raw(';'), 'wget' , fio , run.Raw(';'), run.Raw('tar -xvf fio*tar.gz'), run.Raw(';'),
|
|
run.Raw('cd fio-fio*'), 'configure', run.Raw(';') ,'make'])
|
|
remote.run(args=['ceph', '-s'])
|
|
remote.run(args=[run.Raw('{tdir}/fio-fio-{v}/fio --showcmd {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))])
|
|
remote.run(args=['sudo', run.Raw('{tdir}/fio-fio-{v}/fio {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))])
|
|
remote.run(args=['ceph', '-s'])
|
|
finally:
|
|
out=StringIO.StringIO()
|
|
remote.run(args=['rbd','showmapped', '--format=json'], stdout=out)
|
|
mapped_images = json.loads(out.getvalue())
|
|
if mapped_images:
|
|
log.info("Unmapping rbd images on {sn}".format(sn=sn))
|
|
for image in mapped_images.itervalues():
|
|
remote.run(args=['sudo', 'rbd', 'unmap', str(image['device'])])
|
|
log.info("Cleaning up fio install")
|
|
remote.run(args=['rm','-rf', run.Raw(rbd_test_dir)])
|
|
if ioengine_pkg:
|
|
remove_package(ioengine_pkg, remote)
|