Merge pull request #44217 from CongMinYin/fix-pwl-recovery-test

qa/suites/rbd/pwl-cache: ensure recovery is actually tested

Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Deepika Upadhyay <dupadhya@redhat.com>
This commit is contained in:
Ilya Dryomov 2022-07-16 11:34:55 +02:00 committed by GitHub
commit 3d710cda2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 99 additions and 4 deletions

View File

@ -2,9 +2,8 @@ tasks:
- rbd.create_image:
client.0:
image_name: testimage
image_size: 10
image_size: 10240
image_format: 2
- exec:
- rbd_pwl_cache_recovery:
client.0:
- "timeout 10s rbd bench --io-pattern rand --io-type write testimage || true"
- "rbd bench --io-type write --io-pattern rand --io-total 32M testimage"
image_name: testimage

View File

@ -0,0 +1,96 @@
"""
persistent write log cache recovery task
"""
import contextlib
import logging
import random
import json
import time
from teuthology import misc as teuthology
from teuthology import contextutil
DEFAULT_NUM_ITERATIONS = 20
IO_PATTERNS = ("full-seq", "rand")
IO_SIZES = ('4K', '16K', '128K', '1024K')
log = logging.getLogger(__name__)
@contextlib.contextmanager
def thrashes_rbd_bench_on_persistent_cache(ctx, config):
"""
thrashes rbd bench on persistent write log cache.
It can test recovery feature of persistent write log cache.
"""
log.info("thrashes rbd bench on persistent write log cache")
client, client_config = list(config.items())[0]
(remote,) = ctx.cluster.only(client).remotes.keys()
client_config = client_config if client_config is not None else dict()
image_name = client_config.get('image_name', 'testimage')
num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS)
for i in range(num_iterations):
log.info("start rbd bench")
# rbd bench could not specify the run time so set a large enough test size.
remote.run(
args=[
'rbd', 'bench',
'--io-type', 'write',
'--io-pattern', random.choice(IO_PATTERNS),
'--io-size', random.choice(IO_SIZES),
'--io-total', '100G',
image_name,
],
wait=False,
)
# Wait a few seconds for the rbd bench process to run
# and complete the pwl cache initialization
time.sleep(10)
log.info("dump cache state when rbd bench running.")
remote.sh(['rbd', 'status', image_name, '--format=json'])
log.info("sleep...")
time.sleep(random.randint(10, 60))
log.info("rbd bench crash.")
remote.run(
args=[
'killall', '-9', 'rbd',
],
check_status=False,
)
log.info("wait for watch timeout.")
time.sleep(40)
log.info("check cache state after crash.")
out = remote.sh(['rbd', 'status', image_name, '--format=json'])
rbd_status = json.loads(out)
assert len(rbd_status['watchers']) == 0
assert rbd_status['persistent_cache']['present'] == True
assert rbd_status['persistent_cache']['empty'] == False
assert rbd_status['persistent_cache']['clean'] == False
log.info("check dirty cache file.")
remote.run(
args=[
'test', '-e', rbd_status['persistent_cache']['path'],
]
)
try:
yield
finally:
log.info("cleanup")
@contextlib.contextmanager
def task(ctx, config):
"""
This is task for testing persistent write log cache recovery.
"""
assert isinstance(config, dict), \
"task rbd_pwl_cache_recovery only supports a dictionary for configuration"
managers = []
config = teuthology.replace_all_with_clients(ctx.cluster, config)
managers.append(
lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config)
)
with contextutil.nested(*managers):
yield