mirror of
https://github.com/ceph/ceph
synced 2025-01-04 10:12:30 +00:00
2de0574382
It doesn't really thrash anything, just repeatedly restarts the workload on top of a dirty cache file. rbd_pwl_cache_recovery is more on point and gets covered by existing CODEOWNERS. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
97 lines
3.1 KiB
Python
97 lines
3.1 KiB
Python
"""
|
|
persistent write log cache recovery task
|
|
"""
|
|
import contextlib
|
|
import logging
|
|
import random
|
|
import json
|
|
import time
|
|
|
|
from teuthology import misc as teuthology
|
|
from teuthology import contextutil
|
|
|
|
DEFAULT_NUM_ITERATIONS = 20
|
|
IO_PATTERNS = ("full-seq", "rand")
|
|
IO_SIZES = ('4K', '16K', '128K', '1024K')
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
@contextlib.contextmanager
|
|
def thrashes_rbd_bench_on_persistent_cache(ctx, config):
|
|
"""
|
|
thrashes rbd bench on persistent write log cache.
|
|
It can test recovery feature of persistent write log cache.
|
|
"""
|
|
log.info("thrashes rbd bench on persistent write log cache")
|
|
|
|
client, client_config = list(config.items())[0]
|
|
(remote,) = ctx.cluster.only(client).remotes.keys()
|
|
client_config = client_config if client_config is not None else dict()
|
|
image_name = client_config.get('image_name', 'testimage')
|
|
num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS)
|
|
|
|
for i in range(num_iterations):
|
|
log.info("start rbd bench")
|
|
# rbd bench could not specify the run time so set a large enough test size.
|
|
remote.run(
|
|
args=[
|
|
'rbd', 'bench',
|
|
'--io-type', 'write',
|
|
'--io-pattern', random.choice(IO_PATTERNS),
|
|
'--io-size', random.choice(IO_SIZES),
|
|
'--io-total', '100G',
|
|
image_name,
|
|
],
|
|
wait=False,
|
|
)
|
|
# Wait a few seconds for the rbd bench process to run
|
|
# and complete the pwl cache initialization
|
|
time.sleep(10)
|
|
log.info("dump cache state when rbd bench running.")
|
|
remote.sh(['rbd', 'status', image_name, '--format=json'])
|
|
log.info("sleep...")
|
|
time.sleep(random.randint(10, 60))
|
|
log.info("rbd bench crash.")
|
|
remote.run(
|
|
args=[
|
|
'killall', '-9', 'rbd',
|
|
],
|
|
check_status=False,
|
|
)
|
|
log.info("wait for watch timeout.")
|
|
time.sleep(40)
|
|
log.info("check cache state after crash.")
|
|
out = remote.sh(['rbd', 'status', image_name, '--format=json'])
|
|
rbd_status = json.loads(out)
|
|
assert len(rbd_status['watchers']) == 0
|
|
assert rbd_status['persistent_cache']['present'] == True
|
|
assert rbd_status['persistent_cache']['empty'] == False
|
|
assert rbd_status['persistent_cache']['clean'] == False
|
|
log.info("check dirty cache file.")
|
|
remote.run(
|
|
args=[
|
|
'test', '-e', rbd_status['persistent_cache']['path'],
|
|
]
|
|
)
|
|
try:
|
|
yield
|
|
finally:
|
|
log.info("cleanup")
|
|
|
|
@contextlib.contextmanager
|
|
def task(ctx, config):
|
|
"""
|
|
This is task for testing persistent write log cache recovery.
|
|
"""
|
|
assert isinstance(config, dict), \
|
|
"task rbd_pwl_cache_recovery only supports a dictionary for configuration"
|
|
|
|
managers = []
|
|
config = teuthology.replace_all_with_clients(ctx.cluster, config)
|
|
managers.append(
|
|
lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config)
|
|
)
|
|
|
|
with contextutil.nested(*managers):
|
|
yield
|