ceph/qa/standalone/osd/osd-bluefs-volume-ops.sh
Gabriel BenHanokh a39b1f3cf7 tools/ceph-bluestore-tool: Fix bluefs-bdev-expand command
Update allocation file when we expand-device
Add the expended space to the allocator and then force an update to the allocation file

There is also a new standalone test case for expand

Fixes: https://tracker.ceph.com/issues/53699
Signed-off-by: Gabriel Benhanokh <gbenhano@redhat.com>
2022-01-12 18:07:59 +02:00

498 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
[ `uname` = FreeBSD ] && exit 0
function run() {
local dir=$1
shift
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
setup $dir || return 1
$func $dir || return 1
teardown $dir || return 1
done
}
function TEST_bluestore() {
local dir=$1
local flimit=$(ulimit -n)
if [ $flimit -lt 1536 ]; then
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
fi
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
CEPH_ARGS+="--bluestore_block_size=2147483648 "
CEPH_ARGS+="--bluestore_block_db_create=true "
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
CEPH_ARGS+="--bluestore_block_wal_create=true "
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
osd_pid0=$(cat $dir/osd.0.pid)
run_osd $dir 1 || return 1
osd_pid1=$(cat $dir/osd.1.pid)
run_osd $dir 2 || return 1
osd_pid2=$(cat $dir/osd.2.pid)
run_osd $dir 3 || return 1
osd_pid3=$(cat $dir/osd.3.pid)
sleep 5
create_pool foo 16
# write some objects
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
echo "after bench"
# kill
while kill $osd_pid0; do sleep 1 ; done
ceph osd down 0
while kill $osd_pid1; do sleep 1 ; done
ceph osd down 1
while kill $osd_pid2; do sleep 1 ; done
ceph osd down 2
while kill $osd_pid3; do sleep 1 ; done
ceph osd down 3
# expand slow devices
ceph-bluestore-tool --path $dir/0 fsck || return 1
ceph-bluestore-tool --path $dir/1 fsck || return 1
ceph-bluestore-tool --path $dir/2 fsck || return 1
ceph-bluestore-tool --path $dir/3 fsck || return 1
truncate $dir/0/block -s 4294967296 # 4GB
ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1
truncate $dir/1/block -s 4311744512 # 4GB + 16MB
ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1
truncate $dir/2/block -s 4295099392 # 4GB + 129KB
ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1
truncate $dir/3/block -s 4293918720 # 4GB - 1MB
ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1
# slow, DB, WAL -> slow, DB
ceph-bluestore-tool --path $dir/0 fsck || return 1
ceph-bluestore-tool --path $dir/1 fsck || return 1
ceph-bluestore-tool --path $dir/2 fsck || return 1
ceph-bluestore-tool --path $dir/3 fsck || return 1
ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes
ceph-bluestore-tool --path $dir/0 \
--devs-source $dir/0/block.wal \
--dev-target $dir/0/block.db \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/0 fsck || return 1
# slow, DB, WAL -> slow, WAL
ceph-bluestore-tool --path $dir/1 \
--devs-source $dir/1/block.db \
--dev-target $dir/1/block \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/1 fsck || return 1
# slow, DB, WAL -> slow
ceph-bluestore-tool --path $dir/2 \
--devs-source $dir/2/block.wal \
--devs-source $dir/2/block.db \
--dev-target $dir/2/block \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/2 fsck || return 1
# slow, DB, WAL -> slow, WAL (negative case)
ceph-bluestore-tool --path $dir/3 \
--devs-source $dir/3/block.db \
--dev-target $dir/3/block.wal \
--command bluefs-bdev-migrate
# Migration to WAL is unsupported
if [ $? -eq 0 ]; then
return 1
fi
ceph-bluestore-tool --path $dir/3 fsck || return 1
# slow, DB, WAL -> slow, DB (WAL to slow then slow to DB)
ceph-bluestore-tool --path $dir/3 \
--devs-source $dir/3/block.wal \
--dev-target $dir/3/block \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/3 fsck || return 1
ceph-bluestore-tool --path $dir/3 \
--devs-source $dir/3/block \
--dev-target $dir/3/block.db \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/3 fsck || return 1
activate_osd $dir 0 || return 1
osd_pid0=$(cat $dir/osd.0.pid)
activate_osd $dir 1 || return 1
osd_pid1=$(cat $dir/osd.1.pid)
activate_osd $dir 2 || return 1
osd_pid2=$(cat $dir/osd.2.pid)
activate_osd $dir 3 || return 1
osd_pid3=$(cat $dir/osd.3.pid)
wait_for_clean || return 1
# write some objects
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
# kill
while kill $osd_pid0; do sleep 1 ; done
ceph osd down 0
while kill $osd_pid1; do sleep 1 ; done
ceph osd down 1
while kill $osd_pid2; do sleep 1 ; done
ceph osd down 2
while kill $osd_pid3; do sleep 1 ; done
ceph osd down 3
# slow, DB -> slow, DB, WAL
ceph-bluestore-tool --path $dir/0 fsck || return 1
dd if=/dev/zero of=$dir/0/wal count=512 bs=1M
ceph-bluestore-tool --path $dir/0 \
--dev-target $dir/0/wal \
--command bluefs-bdev-new-wal || return 1
ceph-bluestore-tool --path $dir/0 fsck || return 1
# slow, WAL -> slow, DB, WAL
ceph-bluestore-tool --path $dir/1 fsck || return 1
dd if=/dev/zero of=$dir/1/db count=1024 bs=1M
ceph-bluestore-tool --path $dir/1 \
--dev-target $dir/1/db \
--command bluefs-bdev-new-db || return 1
ceph-bluestore-tool --path $dir/1 \
--devs-source $dir/1/block \
--dev-target $dir/1/block.db \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/1 fsck || return 1
# slow -> slow, DB, WAL
ceph-bluestore-tool --path $dir/2 fsck || return 1
ceph-bluestore-tool --path $dir/2 \
--command bluefs-bdev-new-db || return 1
ceph-bluestore-tool --path $dir/2 \
--command bluefs-bdev-new-wal || return 1
ceph-bluestore-tool --path $dir/2 \
--devs-source $dir/2/block \
--dev-target $dir/2/block.db \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/2 fsck || return 1
# slow, DB -> slow, WAL
ceph-bluestore-tool --path $dir/3 fsck || return 1
ceph-bluestore-tool --path $dir/3 \
--command bluefs-bdev-new-wal || return 1
ceph-bluestore-tool --path $dir/3 \
--devs-source $dir/3/block.db \
--dev-target $dir/3/block \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/3 fsck || return 1
activate_osd $dir 0 || return 1
osd_pid0=$(cat $dir/osd.0.pid)
activate_osd $dir 1 || return 1
osd_pid1=$(cat $dir/osd.1.pid)
activate_osd $dir 2 || return 1
osd_pid2=$(cat $dir/osd.2.pid)
activate_osd $dir 3 || return 1
osd_pid3=$(cat $dir/osd.3.pid)
# write some objects
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
# kill
while kill $osd_pid0; do sleep 1 ; done
ceph osd down 0
while kill $osd_pid1; do sleep 1 ; done
ceph osd down 1
while kill $osd_pid2; do sleep 1 ; done
ceph osd down 2
while kill $osd_pid3; do sleep 1 ; done
ceph osd down 3
# slow, DB1, WAL -> slow, DB2, WAL
ceph-bluestore-tool --path $dir/0 fsck || return 1
dd if=/dev/zero of=$dir/0/db2 count=1024 bs=1M
ceph-bluestore-tool --path $dir/0 \
--devs-source $dir/0/block.db \
--dev-target $dir/0/db2 \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/0 fsck || return 1
# slow, DB, WAL1 -> slow, DB, WAL2
dd if=/dev/zero of=$dir/0/wal2 count=512 bs=1M
ceph-bluestore-tool --path $dir/0 \
--devs-source $dir/0/block.wal \
--dev-target $dir/0/wal2 \
--command bluefs-bdev-migrate || return 1
rm -rf $dir/0/wal
ceph-bluestore-tool --path $dir/0 fsck || return 1
# slow, DB + WAL -> slow, DB2 -> slow
ceph-bluestore-tool --path $dir/1 fsck || return 1
dd if=/dev/zero of=$dir/1/db2 count=1024 bs=1M
ceph-bluestore-tool --path $dir/1 \
--devs-source $dir/1/block.db \
--devs-source $dir/1/block.wal \
--dev-target $dir/1/db2 \
--command bluefs-bdev-migrate || return 1
rm -rf $dir/1/db
ceph-bluestore-tool --path $dir/1 fsck || return 1
ceph-bluestore-tool --path $dir/1 \
--devs-source $dir/1/block.db \
--dev-target $dir/1/block \
--command bluefs-bdev-migrate || return 1
rm -rf $dir/1/db2
ceph-bluestore-tool --path $dir/1 fsck || return 1
# slow -> slow, DB (negative case)
ceph-objectstore-tool --type bluestore --data-path $dir/2 \
--op fsck --no-mon-config || return 1
dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M
ceph-bluestore-tool --path $dir/2 \
--devs-source $dir/2/block \
--dev-target $dir/2/db2 \
--command bluefs-bdev-migrate
# Migration from slow-only to new device is unsupported
if [ $? -eq 0 ]; then
return 1
fi
ceph-bluestore-tool --path $dir/2 fsck || return 1
# slow + DB + WAL -> slow, DB2
dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M
ceph-bluestore-tool --path $dir/2 \
--devs-source $dir/2/block \
--devs-source $dir/2/block.db \
--devs-source $dir/2/block.wal \
--dev-target $dir/2/db2 \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/2 fsck || return 1
# slow + WAL -> slow2, WAL2
dd if=/dev/zero of=$dir/3/wal2 count=1024 bs=1M
ceph-bluestore-tool --path $dir/3 \
--devs-source $dir/3/block \
--devs-source $dir/3/block.wal \
--dev-target $dir/3/wal2 \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/3 fsck || return 1
activate_osd $dir 0 || return 1
osd_pid0=$(cat $dir/osd.0.pid)
activate_osd $dir 1 || return 1
osd_pid1=$(cat $dir/osd.1.pid)
activate_osd $dir 2 || return 1
osd_pid2=$(cat $dir/osd.2.pid)
activate_osd $dir 3 || return 1
osd_pid3=$(cat $dir/osd.3.pid)
# write some objects
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
wait_for_clean || return 1
}
function TEST_bluestore2() {
local dir=$1
local flimit=$(ulimit -n)
if [ $flimit -lt 1536 ]; then
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
fi
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
CEPH_ARGS+="--bluestore_block_size=4294967296 "
CEPH_ARGS+="--bluestore_block_db_create=true "
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
CEPH_ARGS+="--bluestore_block_wal_create=false "
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
CEPH_ARGS+="--osd_pool_default_size=1 "
CEPH_ARGS+="--osd_pool_default_min_size=1 "
CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
osd_pid0=$(cat $dir/osd.0.pid)
sleep 5
create_pool foo 16
retry = 0
while [[ $retry -le 5 ]]; do
# write some objects
timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1
#give RocksDB some time to cooldown and put files to slow level(s)
sleep 10
db_used=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.db_used_bytes" )
spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" )
((retry+=1))
test $spilled_over -eq 0 || break
done
test $spilled_over -gt 0 || return 1
while kill $osd_pid0; do sleep 1 ; done
ceph osd down 0
ceph-bluestore-tool --path $dir/0 \
--devs-source $dir/0/block.db \
--dev-target $dir/0/block \
--command bluefs-bdev-migrate || return 1
ceph-bluestore-tool --path $dir/0 \
--command bluefs-bdev-sizes || return 1
ceph-bluestore-tool --path $dir/0 \
--command fsck || return 1
activate_osd $dir 0 || return 1
osd_pid0=$(cat $dir/osd.0.pid)
wait_for_clean || return 1
}
function TEST_bluestore_expand() {
local dir=$1
local flimit=$(ulimit -n)
if [ $flimit -lt 1536 ]; then
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
fi
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
CEPH_ARGS+="--bluestore_block_size=4294967296 "
CEPH_ARGS+="--bluestore_block_db_create=true "
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
CEPH_ARGS+="--bluestore_block_wal_create=false "
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
CEPH_ARGS+="--osd_pool_default_size=1 "
CEPH_ARGS+="--osd_pool_default_min_size=1 "
CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
osd_pid0=$(cat $dir/osd.0.pid)
sleep 5
create_pool foo 16
# write some objects
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
sleep 5
total_space_before=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
free_space_before=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
# kill
while kill $osd_pid0; do sleep 1 ; done
ceph osd down 0
# destage allocation to file before expand (in case fast-shutdown skipped that step)
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 allocmap || return 1
# expand slow devices
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
requested_space=4294967296 # 4GB
truncate $dir/0/block -s $requested_space
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-expand || return 1
# slow, DB, WAL -> slow, DB
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
# compare allocation-file with RocksDB state
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-sizes
activate_osd $dir 0 || return 1
osd_pid0=$(cat $dir/osd.0.pid)
wait_for_clean || return 1
total_space_after=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
free_space_after=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
if [$total_space_after != $requested_space]; then
echo "total_space_after = $total_space_after"
echo "requested_space = $requested_space"
return 1;
fi
total_space_added=$((total_space_after - total_space_before))
free_space_added=$((free_space_after - free_space_before))
let new_used_space=($total_space_added - $free_space_added)
echo $new_used_space
# allow upto 128KB to be consumed
if [ $new_used_space -gt 131072 ]; then
echo "total_space_added = $total_space_added"
echo "free_space_added = $free_space_added"
return 1;
fi
# kill
while kill $osd_pid0; do sleep 1 ; done
ceph osd down 0
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
}
main osd-bluefs-volume-ops "$@"
# Local Variables:
# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bluefs-volume-ops.sh"
# End: