ceph/qa/tasks/rbd_pwl_cache_recovery.py
Ilya Dryomov 2de0574382 qa/tasks: rename persistent write log cache trash task
It doesn't really thrash anything, just repeatedly restarts the
workload on top of a dirty cache file.  rbd_pwl_cache_recovery is
more on point and gets covered by existing CODEOWNERS.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2022-07-16 09:46:58 +02:00

97 lines
3.1 KiB
Python

"""
persistent write log cache recovery task
"""
import contextlib
import logging
import random
import json
import time
from teuthology import misc as teuthology
from teuthology import contextutil
DEFAULT_NUM_ITERATIONS = 20
IO_PATTERNS = ("full-seq", "rand")
IO_SIZES = ('4K', '16K', '128K', '1024K')
log = logging.getLogger(__name__)
@contextlib.contextmanager
def thrashes_rbd_bench_on_persistent_cache(ctx, config):
"""
thrashes rbd bench on persistent write log cache.
It can test recovery feature of persistent write log cache.
"""
log.info("thrashes rbd bench on persistent write log cache")
client, client_config = list(config.items())[0]
(remote,) = ctx.cluster.only(client).remotes.keys()
client_config = client_config if client_config is not None else dict()
image_name = client_config.get('image_name', 'testimage')
num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS)
for i in range(num_iterations):
log.info("start rbd bench")
# rbd bench could not specify the run time so set a large enough test size.
remote.run(
args=[
'rbd', 'bench',
'--io-type', 'write',
'--io-pattern', random.choice(IO_PATTERNS),
'--io-size', random.choice(IO_SIZES),
'--io-total', '100G',
image_name,
],
wait=False,
)
# Wait a few seconds for the rbd bench process to run
# and complete the pwl cache initialization
time.sleep(10)
log.info("dump cache state when rbd bench running.")
remote.sh(['rbd', 'status', image_name, '--format=json'])
log.info("sleep...")
time.sleep(random.randint(10, 60))
log.info("rbd bench crash.")
remote.run(
args=[
'killall', '-9', 'rbd',
],
check_status=False,
)
log.info("wait for watch timeout.")
time.sleep(40)
log.info("check cache state after crash.")
out = remote.sh(['rbd', 'status', image_name, '--format=json'])
rbd_status = json.loads(out)
assert len(rbd_status['watchers']) == 0
assert rbd_status['persistent_cache']['present'] == True
assert rbd_status['persistent_cache']['empty'] == False
assert rbd_status['persistent_cache']['clean'] == False
log.info("check dirty cache file.")
remote.run(
args=[
'test', '-e', rbd_status['persistent_cache']['path'],
]
)
try:
yield
finally:
log.info("cleanup")
@contextlib.contextmanager
def task(ctx, config):
"""
This is task for testing persistent write log cache recovery.
"""
assert isinstance(config, dict), \
"task rbd_pwl_cache_recovery only supports a dictionary for configuration"
managers = []
config = teuthology.replace_all_with_clients(ctx.cluster, config)
managers.append(
lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config)
)
with contextutil.nested(*managers):
yield