From 0eab8de3c017d8318bd6c846991bb3f7c51fa97d Mon Sep 17 00:00:00 2001 From: Yin Congmin Date: Fri, 7 Jan 2022 15:03:44 +0800 Subject: [PATCH 1/2] qa/tasks: add thrash test for persistent write log cache add thrash test for persistent write log cache. run rbd bench on persistent write log cache, thrashes rbd bench, test the recovery function of persistent write log cache. Signed-off-by: Yin Congmin --- .../pwl-cache/home/7-workloads/recovery.yaml | 7 +- qa/tasks/persistent_write_log_cache_thrash.py | 96 +++++++++++++++++++ 2 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 qa/tasks/persistent_write_log_cache_thrash.py diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml b/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml index 63a0c9dcf25..a991308c691 100644 --- a/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml +++ b/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml @@ -2,9 +2,8 @@ tasks: - rbd.create_image: client.0: image_name: testimage - image_size: 10 + image_size: 10240 image_format: 2 -- exec: +- persistent_write_log_cache_thrash: client.0: - - "timeout 10s rbd bench --io-pattern rand --io-type write testimage || true" - - "rbd bench --io-type write --io-pattern rand --io-total 32M testimage" + image_name: testimage diff --git a/qa/tasks/persistent_write_log_cache_thrash.py b/qa/tasks/persistent_write_log_cache_thrash.py new file mode 100644 index 00000000000..d487c76664b --- /dev/null +++ b/qa/tasks/persistent_write_log_cache_thrash.py @@ -0,0 +1,96 @@ +""" +persistent write log cache thrash task +""" +import contextlib +import logging +import random +import json +import time + +from teuthology import misc as teuthology +from teuthology import contextutil + +DEFAULT_NUM_ITERATIONS = 20 +IO_PATTERNS = ("full-seq", "rand") +IO_SIZES = ('4K', '16K', '128K', '1024K') + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def thrashes_rbd_bench_on_persistent_cache(ctx, config): + """ + thrashes rbd bench on persistent write log cache. + It can test recovery feature of persistent write log cache. + """ + log.info("thrashes rbd bench on persistent write log cache") + + client, client_config = list(config.items())[0] + (remote,) = ctx.cluster.only(client).remotes.keys() + client_config = client_config if client_config is not None else dict() + image_name = client_config.get('image_name', 'testimage') + num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS) + + for i in range(num_iterations): + log.info("start rbd bench") + # rbd bench could not specify the run time so set a large enough test size. + remote.run( + args=[ + 'rbd', 'bench', + '--io-type', 'write', + '--io-pattern', random.choice(IO_PATTERNS), + '--io-size', random.choice(IO_SIZES), + '--io-total', '100G', + image_name, + ], + wait=False, + ) + # Wait a few seconds for the rbd bench process to run + # and complete the pwl cache initialization + time.sleep(10) + log.info("dump cache state when rbd bench running.") + remote.sh(['rbd', 'status', image_name, '--format=json']) + log.info("sleep...") + time.sleep(random.randint(10, 60)) + log.info("rbd bench crash.") + remote.run( + args=[ + 'killall', '-9', 'rbd', + ], + check_status=False, + ) + log.info("wait for watch timeout.") + time.sleep(40) + log.info("check cache state after crash.") + out = remote.sh(['rbd', 'status', image_name, '--format=json']) + rbd_status = json.loads(out) + assert len(rbd_status['watchers']) == 0 + assert rbd_status['persistent_cache']['present'] == True + assert rbd_status['persistent_cache']['empty'] == False + assert rbd_status['persistent_cache']['clean'] == False + log.info("check dirty cache file.") + remote.run( + args=[ + 'test', '-e', rbd_status['persistent_cache']['path'], + ] + ) + try: + yield + finally: + log.info("cleanup") + +@contextlib.contextmanager +def task(ctx, config): + """ + This is task for testing persistent write log cache thrash. + """ + assert isinstance(config, dict), \ + "task persistent_write_log_cache_thrash only supports a dictionary for configuration" + + managers = [] + config = teuthology.replace_all_with_clients(ctx.cluster, config) + managers.append( + lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config) + ) + + with contextutil.nested(*managers): + yield From 2de0574382e2c1c63f20745d6870ac7f82b27b9f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 16 Jul 2022 08:54:38 +0200 Subject: [PATCH 2/2] qa/tasks: rename persistent write log cache trash task It doesn't really thrash anything, just repeatedly restarts the workload on top of a dirty cache file. rbd_pwl_cache_recovery is more on point and gets covered by existing CODEOWNERS. Signed-off-by: Ilya Dryomov --- qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml | 2 +- ..._write_log_cache_thrash.py => rbd_pwl_cache_recovery.py} | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename qa/tasks/{persistent_write_log_cache_thrash.py => rbd_pwl_cache_recovery.py} (93%) diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml b/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml index a991308c691..3017beb22fb 100644 --- a/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml +++ b/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml @@ -4,6 +4,6 @@ tasks: image_name: testimage image_size: 10240 image_format: 2 -- persistent_write_log_cache_thrash: +- rbd_pwl_cache_recovery: client.0: image_name: testimage diff --git a/qa/tasks/persistent_write_log_cache_thrash.py b/qa/tasks/rbd_pwl_cache_recovery.py similarity index 93% rename from qa/tasks/persistent_write_log_cache_thrash.py rename to qa/tasks/rbd_pwl_cache_recovery.py index d487c76664b..e13c1f66447 100644 --- a/qa/tasks/persistent_write_log_cache_thrash.py +++ b/qa/tasks/rbd_pwl_cache_recovery.py @@ -1,5 +1,5 @@ """ -persistent write log cache thrash task +persistent write log cache recovery task """ import contextlib import logging @@ -81,10 +81,10 @@ def thrashes_rbd_bench_on_persistent_cache(ctx, config): @contextlib.contextmanager def task(ctx, config): """ - This is task for testing persistent write log cache thrash. + This is task for testing persistent write log cache recovery. """ assert isinstance(config, dict), \ - "task persistent_write_log_cache_thrash only supports a dictionary for configuration" + "task rbd_pwl_cache_recovery only supports a dictionary for configuration" managers = [] config = teuthology.replace_all_with_clients(ctx.cluster, config)