From 00651cfac230a759389a595b41bb220474f5f2a7 Mon Sep 17 00:00:00 2001 From: Vallari Agrawal Date: Tue, 20 Feb 2024 13:14:32 +0530 Subject: [PATCH] qa/suite/rbd/nvmeof: Deploy multiple gateways and namespaces 1. Deploy 2 gateways on different nodes, then check for multi-path. To add another gateway, only "roles" need to be changed in job yaml. 2. Create "n" nvmeof namespaces, configured by 'namespaces_count' 3. Rename qa/suites/rbd/nvmeof/cluster/fixed-3.yaml to fixed-4.yaml which contains 2 gateways and 2 initiators. Signed-off-by: Vallari Agrawal --- .../cluster/{fixed-3.yaml => fixed-4.yaml} | 2 + qa/suites/rbd/nvmeof/conf | 1 + .../nvmeof/workloads/nvmeof_initiator.yaml | 32 ++++++--- qa/tasks/nvmeof.py | 70 +++++++++---------- qa/workunits/rbd/nvmeof_basic_tests.sh | 16 +++-- qa/workunits/rbd/nvmeof_fio_test.sh | 53 ++++++++++++-- qa/workunits/rbd/nvmeof_initiator.sh | 27 ------- qa/workunits/rbd/nvmeof_setup_subsystem.sh | 38 ++++++++++ 8 files changed, 152 insertions(+), 87 deletions(-) rename qa/suites/rbd/nvmeof/cluster/{fixed-3.yaml => fixed-4.yaml} (81%) create mode 120000 qa/suites/rbd/nvmeof/conf delete mode 100755 qa/workunits/rbd/nvmeof_initiator.sh create mode 100755 qa/workunits/rbd/nvmeof_setup_subsystem.sh diff --git a/qa/suites/rbd/nvmeof/cluster/fixed-3.yaml b/qa/suites/rbd/nvmeof/cluster/fixed-4.yaml similarity index 81% rename from qa/suites/rbd/nvmeof/cluster/fixed-3.yaml rename to qa/suites/rbd/nvmeof/cluster/fixed-4.yaml index f417079e31a..d57e9fc47be 100644 --- a/qa/suites/rbd/nvmeof/cluster/fixed-3.yaml +++ b/qa/suites/rbd/nvmeof/cluster/fixed-4.yaml @@ -12,4 +12,6 @@ roles: - osd.3 - osd.4 - client.1 + - ceph.nvmeof.nvmeof.b - - client.2 +- - client.3 diff --git a/qa/suites/rbd/nvmeof/conf b/qa/suites/rbd/nvmeof/conf new file mode 120000 index 00000000000..4bc0fe86c63 --- /dev/null +++ b/qa/suites/rbd/nvmeof/conf @@ -0,0 +1 @@ +.qa/rbd/conf \ No newline at end of file diff --git a/qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml b/qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml index bbb9b0ab5f2..5a4143ffa91 100644 --- a/qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml +++ b/qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml @@ -1,15 +1,13 @@ tasks: - nvmeof: client: client.0 - version: latest # "default" uses packaged version; change to test specific nvmeof images, example "latest" + version: default # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest" rbd: - pool_name: mypool - image_name: myimage + pool_name: mypool + image_name_prefix: myimage gateway_config: - source: host.a - target: client.2 - vars: - cli_version: latest + namespaces_count: 128 + cli_version: latest - cephadm.wait_for_service: service: nvmeof.mypool @@ -18,10 +16,22 @@ tasks: no_coverage_and_limits: true clients: client.2: - - rbd/nvmeof_initiator.sh - - rbd/nvmeof_basic_tests.sh - - rbd/nvmeof_fio_test.sh + - rbd/nvmeof_setup_subsystem.sh + env: + RBD_POOL: mypool + RBD_IMAGE_PREFIX: myimage + +- workunit: + no_coverage_and_limits: true + timeout: 30m + clients: + client.2: + - rbd/nvmeof_basic_tests.sh + - rbd/nvmeof_fio_test.sh --start_ns 1 --end_ns 64 --rbd_iostat + client.3: + - rbd/nvmeof_basic_tests.sh + - rbd/nvmeof_fio_test.sh --start_ns 65 --end_ns 128 env: RBD_POOL: mypool - RBD_IMAGE: myimage IOSTAT_INTERVAL: '10' + RUNTIME: '600' diff --git a/qa/tasks/nvmeof.py b/qa/tasks/nvmeof.py index b75d00d93ae..7e9afe78946 100644 --- a/qa/tasks/nvmeof.py +++ b/qa/tasks/nvmeof.py @@ -20,13 +20,10 @@ class Nvmeof(Task): version: default rbd: pool_name: mypool - image_name: myimage rbd_size: 1024 gateway_config: - source: host.a - target: client.2 - vars: - cli_version: latest + namespaces_count: 10 + cli_version: latest """ @@ -54,17 +51,17 @@ class Nvmeof(Task): rbd_config = self.config.get('rbd', {}) self.poolname = rbd_config.get('pool_name', 'mypool') - self.rbd_image_name = rbd_config.get('image_name', 'myimage') + self.image_name_prefix = rbd_config.get('image_name_prefix', 'myimage') self.rbd_size = rbd_config.get('rbd_size', 1024*8) gateway_config = self.config.get('gateway_config', {}) - conf_vars = gateway_config.get('vars', {}) - self.cli_image = conf_vars.get('cli_version', 'latest') - self.bdev = conf_vars.get('bdev', 'mybdev') - self.serial = conf_vars.get('serial', 'SPDK00000000000001') - self.nqn = conf_vars.get('nqn', 'nqn.2016-06.io.spdk:cnode1') - self.port = conf_vars.get('port', '4420') - self.srport = conf_vars.get('srport', '5500') + self.namespaces_count = gateway_config.get('namespaces_count', 1) + self.cli_image = gateway_config.get('cli_version', 'latest') + self.bdev = gateway_config.get('bdev', 'mybdev') + self.serial = gateway_config.get('serial', 'SPDK00000000000001') + self.nqn = gateway_config.get('nqn', 'nqn.2016-06.io.spdk:cnode1') + self.port = gateway_config.get('port', '4420') + self.srport = gateway_config.get('srport', '5500') def deploy_nvmeof(self): """ @@ -97,7 +94,6 @@ class Nvmeof(Task): ]) poolname = self.poolname - imagename = self.rbd_image_name log.info(f'[nvmeof]: ceph osd pool create {poolname}') _shell(self.ctx, self.cluster_name, self.remote, [ @@ -115,10 +111,13 @@ class Nvmeof(Task): '--placement', str(len(nodes)) + ';' + ';'.join(nodes) ]) - log.info(f'[nvmeof]: rbd create {poolname}/{imagename} --size {self.rbd_size}') - _shell(self.ctx, self.cluster_name, self.remote, [ - 'rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}' - ]) + log.info(f'[nvmeof]: creating {self.namespaces_count} images') + for i in range(1, int(self.namespaces_count) + 1): + imagename = self.image_name_prefix + str(i) + log.info(f'[nvmeof]: rbd create {poolname}/{imagename} --size {self.rbd_size}') + _shell(self.ctx, self.cluster_name, self.remote, [ + 'rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}' + ]) for role, i in daemons.items(): remote, id_ = i @@ -134,34 +133,29 @@ class Nvmeof(Task): def set_gateway_cfg(self): log.info('[nvmeof]: running set_gateway_cfg...') - gateway_config = self.config.get('gateway_config', {}) - source_host = gateway_config.get('source') - target_host = gateway_config.get('target') - if not (source_host and target_host): - raise ConfigError('gateway_config requires "source" and "target"') - remote = list(self.ctx.cluster.only(source_host).remotes.keys())[0] - ip_address = remote.ip_address - gateway_name = "" + ip_address = self.remote.ip_address + gateway_names = [] + gateway_ips = [] nvmeof_daemons = self.ctx.daemons.iter_daemons_of_role('nvmeof', cluster=self.cluster_name) for daemon in nvmeof_daemons: - if ip_address == daemon.remote.ip_address: - gateway_name = daemon.name() + gateway_names += [daemon.name()] + gateway_ips += [daemon.remote.ip_address] conf_data = dedent(f""" - NVMEOF_GATEWAY_IP_ADDRESS={ip_address} - NVMEOF_GATEWAY_NAME={gateway_name} + NVMEOF_GATEWAY_IP_ADDRESSES={",".join(gateway_ips)} + NVMEOF_GATEWAY_NAMES={",".join(gateway_names)} + NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS={ip_address} NVMEOF_CLI_IMAGE="quay.io/ceph/nvmeof-cli:{self.cli_image}" - NVMEOF_BDEV={self.bdev} - NVMEOF_SERIAL={self.serial} + NVMEOF_NAMESPACES_COUNT={self.namespaces_count} NVMEOF_NQN={self.nqn} NVMEOF_PORT={self.port} NVMEOF_SRPORT={self.srport} """) - target_remote = list(self.ctx.cluster.only(target_host).remotes.keys())[0] - target_remote.write_file( - path=conf_file, - data=conf_data, - sudo=True - ) + for remote in self.ctx.cluster.remotes.keys(): + remote.write_file( + path=conf_file, + data=conf_data, + sudo=True + ) log.info("[nvmeof]: executed set_gateway_cfg successfully!") diff --git a/qa/workunits/rbd/nvmeof_basic_tests.sh b/qa/workunits/rbd/nvmeof_basic_tests.sh index 878e043fbeb..d92eed9b6f0 100755 --- a/qa/workunits/rbd/nvmeof_basic_tests.sh +++ b/qa/workunits/rbd/nvmeof_basic_tests.sh @@ -1,11 +1,16 @@ #!/bin/bash -x +sudo modprobe nvme-fabrics +sudo modprobe nvme-tcp +sudo dnf install nvme-cli -y +sudo lsmod | grep nvme + source /etc/ceph/nvmeof.env SPDK_CONTROLLER="SPDK bdev Controller" DISCOVERY_PORT="8009" discovery() { - output=$(sudo nvme discover -t tcp -a $NVMEOF_GATEWAY_IP_ADDRESS -s $DISCOVERY_PORT) + output=$(sudo nvme discover -t tcp -a $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS -s $DISCOVERY_PORT) expected_discovery_stdout="subtype: nvme subsystem" if ! echo "$output" | grep -q "$expected_discovery_stdout"; then return 1 @@ -13,7 +18,7 @@ discovery() { } connect() { - sudo nvme connect -t tcp --traddr $NVMEOF_GATEWAY_IP_ADDRESS -s $NVMEOF_PORT -n $NVMEOF_NQN + sudo nvme connect -t tcp --traddr $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS -s $NVMEOF_PORT -n $NVMEOF_NQN output=$(sudo nvme list) if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then return 1 @@ -29,7 +34,7 @@ disconnect_all() { } connect_all() { - sudo nvme connect-all --traddr=$NVMEOF_GATEWAY_IP_ADDRESS --transport=tcp + sudo nvme connect-all --traddr=$NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --transport=tcp output=$(sudo nvme list) if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then return 1 @@ -39,7 +44,7 @@ connect_all() { list_subsys() { expected_count=$1 output=$(sudo nvme list-subsys --output-format=json) - multipath=$(echo $output | grep -c '"tcp"') + multipath=$(echo $output | grep -o '"tcp"' | wc -l) if [ "$multipath" -ne "$expected_count" ]; then return 1 fi @@ -65,7 +70,8 @@ test_run list_subsys 1 test_run disconnect_all test_run list_subsys 0 test_run connect_all -test_run list_subsys 1 +gateway_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1)) +test_run list_subsys $gateway_count echo "-------------Test Summary-------------" diff --git a/qa/workunits/rbd/nvmeof_fio_test.sh b/qa/workunits/rbd/nvmeof_fio_test.sh index bacc15e83eb..5e4bad98a64 100755 --- a/qa/workunits/rbd/nvmeof_fio_test.sh +++ b/qa/workunits/rbd/nvmeof_fio_test.sh @@ -3,11 +3,46 @@ sudo yum -y install fio sudo yum -y install sysstat +namespace_range_start= +namespace_range_end= +rbd_iostat=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --start_ns) + namespace_range_start=$2 + shift 2 + ;; + --end_ns) + namespace_range_end=$2 + shift 2 + ;; + --rbd_iostat) + rbd_iostat=true + shift + ;; + *) + exit 100 # Internal error + ;; + esac +done + fio_file=$(mktemp -t nvmeof-fio-XXXX) -drives_list=$(sudo nvme list --output-format=json | jq -r '.Devices | .[] | select(.ModelNumber == "SPDK bdev Controller") | .DevicePath') +all_drives_list=$(sudo nvme list --output-format=json | + jq -r '.Devices | sort_by(.NameSpace) | .[] | select(.ModelNumber == "SPDK bdev Controller") | .DevicePath') + +# When the script is passed --start_ns and --end_ns (example: `nvmeof_fio_test.sh --start_ns 1 --end_ns 3`), +# then fio runs on namespaces only in the defined range (which is 1 to 3 here). +# So if `nvme list` has 5 namespaces with "SPDK Controller", then fio will +# run on first 3 namespaces here. +if [ "$namespace_range_start" ] || [ "$namespace_range_end" ]; then + selected_drives=$(echo "${all_drives_list[@]}" | sed -n "${namespace_range_start},${namespace_range_end}p") +else + selected_drives="${all_drives_list[@]}" +fi + RUNTIME=${RUNTIME:-600} -# IOSTAT_INTERVAL=10 cat >> $fio_file <