mirror of
https://github.com/ceph/ceph
synced 2025-01-18 08:50:42 +00:00
a39b1f3cf7
Update allocation file when we expand-device Add the expended space to the allocator and then force an update to the allocation file There is also a new standalone test case for expand Fixes: https://tracker.ceph.com/issues/53699 Signed-off-by: Gabriel Benhanokh <gbenhano@redhat.com>
498 lines
16 KiB
Bash
Executable File
498 lines
16 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
|
|
|
[ `uname` = FreeBSD ] && exit 0
|
|
|
|
function run() {
|
|
local dir=$1
|
|
shift
|
|
|
|
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
|
for func in $funcs ; do
|
|
setup $dir || return 1
|
|
$func $dir || return 1
|
|
teardown $dir || return 1
|
|
done
|
|
}
|
|
|
|
function TEST_bluestore() {
|
|
local dir=$1
|
|
|
|
local flimit=$(ulimit -n)
|
|
if [ $flimit -lt 1536 ]; then
|
|
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
|
|
fi
|
|
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
|
|
export CEPH_ARGS
|
|
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
|
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
|
CEPH_ARGS+="--bluestore_block_size=2147483648 "
|
|
CEPH_ARGS+="--bluestore_block_db_create=true "
|
|
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
|
|
CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
|
|
CEPH_ARGS+="--bluestore_block_wal_create=true "
|
|
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
|
|
|
|
run_mon $dir a || return 1
|
|
run_mgr $dir x || return 1
|
|
run_osd $dir 0 || return 1
|
|
osd_pid0=$(cat $dir/osd.0.pid)
|
|
run_osd $dir 1 || return 1
|
|
osd_pid1=$(cat $dir/osd.1.pid)
|
|
run_osd $dir 2 || return 1
|
|
osd_pid2=$(cat $dir/osd.2.pid)
|
|
run_osd $dir 3 || return 1
|
|
osd_pid3=$(cat $dir/osd.3.pid)
|
|
|
|
sleep 5
|
|
|
|
create_pool foo 16
|
|
|
|
# write some objects
|
|
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
|
|
|
|
echo "after bench"
|
|
|
|
# kill
|
|
while kill $osd_pid0; do sleep 1 ; done
|
|
ceph osd down 0
|
|
while kill $osd_pid1; do sleep 1 ; done
|
|
ceph osd down 1
|
|
while kill $osd_pid2; do sleep 1 ; done
|
|
ceph osd down 2
|
|
while kill $osd_pid3; do sleep 1 ; done
|
|
ceph osd down 3
|
|
|
|
# expand slow devices
|
|
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
|
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
|
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
|
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
|
|
|
truncate $dir/0/block -s 4294967296 # 4GB
|
|
ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1
|
|
truncate $dir/1/block -s 4311744512 # 4GB + 16MB
|
|
ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1
|
|
truncate $dir/2/block -s 4295099392 # 4GB + 129KB
|
|
ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1
|
|
truncate $dir/3/block -s 4293918720 # 4GB - 1MB
|
|
ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1
|
|
|
|
# slow, DB, WAL -> slow, DB
|
|
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
|
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
|
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
|
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes
|
|
|
|
ceph-bluestore-tool --path $dir/0 \
|
|
--devs-source $dir/0/block.wal \
|
|
--dev-target $dir/0/block.db \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
|
|
|
# slow, DB, WAL -> slow, WAL
|
|
ceph-bluestore-tool --path $dir/1 \
|
|
--devs-source $dir/1/block.db \
|
|
--dev-target $dir/1/block \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
|
|
|
# slow, DB, WAL -> slow
|
|
ceph-bluestore-tool --path $dir/2 \
|
|
--devs-source $dir/2/block.wal \
|
|
--devs-source $dir/2/block.db \
|
|
--dev-target $dir/2/block \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
|
|
|
# slow, DB, WAL -> slow, WAL (negative case)
|
|
ceph-bluestore-tool --path $dir/3 \
|
|
--devs-source $dir/3/block.db \
|
|
--dev-target $dir/3/block.wal \
|
|
--command bluefs-bdev-migrate
|
|
|
|
# Migration to WAL is unsupported
|
|
if [ $? -eq 0 ]; then
|
|
return 1
|
|
fi
|
|
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
|
|
|
# slow, DB, WAL -> slow, DB (WAL to slow then slow to DB)
|
|
ceph-bluestore-tool --path $dir/3 \
|
|
--devs-source $dir/3/block.wal \
|
|
--dev-target $dir/3/block \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/3 \
|
|
--devs-source $dir/3/block \
|
|
--dev-target $dir/3/block.db \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
|
|
|
activate_osd $dir 0 || return 1
|
|
osd_pid0=$(cat $dir/osd.0.pid)
|
|
activate_osd $dir 1 || return 1
|
|
osd_pid1=$(cat $dir/osd.1.pid)
|
|
activate_osd $dir 2 || return 1
|
|
osd_pid2=$(cat $dir/osd.2.pid)
|
|
activate_osd $dir 3 || return 1
|
|
osd_pid3=$(cat $dir/osd.3.pid)
|
|
|
|
wait_for_clean || return 1
|
|
|
|
# write some objects
|
|
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
|
|
|
|
# kill
|
|
while kill $osd_pid0; do sleep 1 ; done
|
|
ceph osd down 0
|
|
while kill $osd_pid1; do sleep 1 ; done
|
|
ceph osd down 1
|
|
while kill $osd_pid2; do sleep 1 ; done
|
|
ceph osd down 2
|
|
while kill $osd_pid3; do sleep 1 ; done
|
|
ceph osd down 3
|
|
|
|
# slow, DB -> slow, DB, WAL
|
|
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
|
|
|
dd if=/dev/zero of=$dir/0/wal count=512 bs=1M
|
|
ceph-bluestore-tool --path $dir/0 \
|
|
--dev-target $dir/0/wal \
|
|
--command bluefs-bdev-new-wal || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
|
|
|
# slow, WAL -> slow, DB, WAL
|
|
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
|
|
|
dd if=/dev/zero of=$dir/1/db count=1024 bs=1M
|
|
ceph-bluestore-tool --path $dir/1 \
|
|
--dev-target $dir/1/db \
|
|
--command bluefs-bdev-new-db || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/1 \
|
|
--devs-source $dir/1/block \
|
|
--dev-target $dir/1/block.db \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
|
|
|
# slow -> slow, DB, WAL
|
|
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/2 \
|
|
--command bluefs-bdev-new-db || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/2 \
|
|
--command bluefs-bdev-new-wal || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/2 \
|
|
--devs-source $dir/2/block \
|
|
--dev-target $dir/2/block.db \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
|
|
|
# slow, DB -> slow, WAL
|
|
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/3 \
|
|
--command bluefs-bdev-new-wal || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/3 \
|
|
--devs-source $dir/3/block.db \
|
|
--dev-target $dir/3/block \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
|
|
|
activate_osd $dir 0 || return 1
|
|
osd_pid0=$(cat $dir/osd.0.pid)
|
|
activate_osd $dir 1 || return 1
|
|
osd_pid1=$(cat $dir/osd.1.pid)
|
|
activate_osd $dir 2 || return 1
|
|
osd_pid2=$(cat $dir/osd.2.pid)
|
|
activate_osd $dir 3 || return 1
|
|
osd_pid3=$(cat $dir/osd.3.pid)
|
|
|
|
# write some objects
|
|
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
|
|
|
|
# kill
|
|
while kill $osd_pid0; do sleep 1 ; done
|
|
ceph osd down 0
|
|
while kill $osd_pid1; do sleep 1 ; done
|
|
ceph osd down 1
|
|
while kill $osd_pid2; do sleep 1 ; done
|
|
ceph osd down 2
|
|
while kill $osd_pid3; do sleep 1 ; done
|
|
ceph osd down 3
|
|
|
|
# slow, DB1, WAL -> slow, DB2, WAL
|
|
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
|
|
|
dd if=/dev/zero of=$dir/0/db2 count=1024 bs=1M
|
|
ceph-bluestore-tool --path $dir/0 \
|
|
--devs-source $dir/0/block.db \
|
|
--dev-target $dir/0/db2 \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
|
|
|
# slow, DB, WAL1 -> slow, DB, WAL2
|
|
|
|
dd if=/dev/zero of=$dir/0/wal2 count=512 bs=1M
|
|
ceph-bluestore-tool --path $dir/0 \
|
|
--devs-source $dir/0/block.wal \
|
|
--dev-target $dir/0/wal2 \
|
|
--command bluefs-bdev-migrate || return 1
|
|
rm -rf $dir/0/wal
|
|
|
|
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
|
|
|
# slow, DB + WAL -> slow, DB2 -> slow
|
|
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
|
|
|
dd if=/dev/zero of=$dir/1/db2 count=1024 bs=1M
|
|
ceph-bluestore-tool --path $dir/1 \
|
|
--devs-source $dir/1/block.db \
|
|
--devs-source $dir/1/block.wal \
|
|
--dev-target $dir/1/db2 \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
rm -rf $dir/1/db
|
|
|
|
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/1 \
|
|
--devs-source $dir/1/block.db \
|
|
--dev-target $dir/1/block \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
rm -rf $dir/1/db2
|
|
|
|
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
|
|
|
# slow -> slow, DB (negative case)
|
|
ceph-objectstore-tool --type bluestore --data-path $dir/2 \
|
|
--op fsck --no-mon-config || return 1
|
|
|
|
dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M
|
|
ceph-bluestore-tool --path $dir/2 \
|
|
--devs-source $dir/2/block \
|
|
--dev-target $dir/2/db2 \
|
|
--command bluefs-bdev-migrate
|
|
|
|
# Migration from slow-only to new device is unsupported
|
|
if [ $? -eq 0 ]; then
|
|
return 1
|
|
fi
|
|
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
|
|
|
# slow + DB + WAL -> slow, DB2
|
|
dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M
|
|
|
|
ceph-bluestore-tool --path $dir/2 \
|
|
--devs-source $dir/2/block \
|
|
--devs-source $dir/2/block.db \
|
|
--devs-source $dir/2/block.wal \
|
|
--dev-target $dir/2/db2 \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
|
|
|
# slow + WAL -> slow2, WAL2
|
|
dd if=/dev/zero of=$dir/3/wal2 count=1024 bs=1M
|
|
|
|
ceph-bluestore-tool --path $dir/3 \
|
|
--devs-source $dir/3/block \
|
|
--devs-source $dir/3/block.wal \
|
|
--dev-target $dir/3/wal2 \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
|
|
|
activate_osd $dir 0 || return 1
|
|
osd_pid0=$(cat $dir/osd.0.pid)
|
|
activate_osd $dir 1 || return 1
|
|
osd_pid1=$(cat $dir/osd.1.pid)
|
|
activate_osd $dir 2 || return 1
|
|
osd_pid2=$(cat $dir/osd.2.pid)
|
|
activate_osd $dir 3 || return 1
|
|
osd_pid3=$(cat $dir/osd.3.pid)
|
|
|
|
# write some objects
|
|
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
|
|
|
|
wait_for_clean || return 1
|
|
}
|
|
|
|
function TEST_bluestore2() {
|
|
local dir=$1
|
|
|
|
local flimit=$(ulimit -n)
|
|
if [ $flimit -lt 1536 ]; then
|
|
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
|
|
fi
|
|
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
|
|
export CEPH_ARGS
|
|
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
|
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
|
CEPH_ARGS+="--bluestore_block_size=4294967296 "
|
|
CEPH_ARGS+="--bluestore_block_db_create=true "
|
|
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
|
|
CEPH_ARGS+="--bluestore_block_wal_create=false "
|
|
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
|
|
CEPH_ARGS+="--osd_pool_default_size=1 "
|
|
CEPH_ARGS+="--osd_pool_default_min_size=1 "
|
|
CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
|
|
|
|
run_mon $dir a || return 1
|
|
run_mgr $dir x || return 1
|
|
run_osd $dir 0 || return 1
|
|
osd_pid0=$(cat $dir/osd.0.pid)
|
|
|
|
sleep 5
|
|
create_pool foo 16
|
|
|
|
retry = 0
|
|
while [[ $retry -le 5 ]]; do
|
|
# write some objects
|
|
timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1
|
|
|
|
#give RocksDB some time to cooldown and put files to slow level(s)
|
|
sleep 10
|
|
|
|
db_used=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.db_used_bytes" )
|
|
spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" )
|
|
((retry+=1))
|
|
test $spilled_over -eq 0 || break
|
|
done
|
|
test $spilled_over -gt 0 || return 1
|
|
|
|
while kill $osd_pid0; do sleep 1 ; done
|
|
ceph osd down 0
|
|
|
|
ceph-bluestore-tool --path $dir/0 \
|
|
--devs-source $dir/0/block.db \
|
|
--dev-target $dir/0/block \
|
|
--command bluefs-bdev-migrate || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/0 \
|
|
--command bluefs-bdev-sizes || return 1
|
|
|
|
ceph-bluestore-tool --path $dir/0 \
|
|
--command fsck || return 1
|
|
|
|
activate_osd $dir 0 || return 1
|
|
osd_pid0=$(cat $dir/osd.0.pid)
|
|
|
|
wait_for_clean || return 1
|
|
}
|
|
|
|
function TEST_bluestore_expand() {
|
|
local dir=$1
|
|
|
|
local flimit=$(ulimit -n)
|
|
if [ $flimit -lt 1536 ]; then
|
|
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
|
|
fi
|
|
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
|
|
export CEPH_ARGS
|
|
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
|
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
|
CEPH_ARGS+="--bluestore_block_size=4294967296 "
|
|
CEPH_ARGS+="--bluestore_block_db_create=true "
|
|
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
|
|
CEPH_ARGS+="--bluestore_block_wal_create=false "
|
|
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
|
|
CEPH_ARGS+="--osd_pool_default_size=1 "
|
|
CEPH_ARGS+="--osd_pool_default_min_size=1 "
|
|
CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
|
|
|
|
run_mon $dir a || return 1
|
|
run_mgr $dir x || return 1
|
|
run_osd $dir 0 || return 1
|
|
osd_pid0=$(cat $dir/osd.0.pid)
|
|
|
|
sleep 5
|
|
create_pool foo 16
|
|
|
|
# write some objects
|
|
timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
|
|
sleep 5
|
|
|
|
total_space_before=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
|
|
free_space_before=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
|
|
|
|
# kill
|
|
while kill $osd_pid0; do sleep 1 ; done
|
|
ceph osd down 0
|
|
|
|
# destage allocation to file before expand (in case fast-shutdown skipped that step)
|
|
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 allocmap || return 1
|
|
|
|
# expand slow devices
|
|
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
|
|
|
|
requested_space=4294967296 # 4GB
|
|
truncate $dir/0/block -s $requested_space
|
|
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-expand || return 1
|
|
|
|
# slow, DB, WAL -> slow, DB
|
|
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
|
|
|
|
# compare allocation-file with RocksDB state
|
|
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
|
|
|
|
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-sizes
|
|
|
|
activate_osd $dir 0 || return 1
|
|
osd_pid0=$(cat $dir/osd.0.pid)
|
|
|
|
wait_for_clean || return 1
|
|
|
|
total_space_after=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
|
|
free_space_after=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
|
|
|
|
if [$total_space_after != $requested_space]; then
|
|
echo "total_space_after = $total_space_after"
|
|
echo "requested_space = $requested_space"
|
|
return 1;
|
|
fi
|
|
|
|
total_space_added=$((total_space_after - total_space_before))
|
|
free_space_added=$((free_space_after - free_space_before))
|
|
|
|
let new_used_space=($total_space_added - $free_space_added)
|
|
echo $new_used_space
|
|
# allow upto 128KB to be consumed
|
|
if [ $new_used_space -gt 131072 ]; then
|
|
echo "total_space_added = $total_space_added"
|
|
echo "free_space_added = $free_space_added"
|
|
return 1;
|
|
fi
|
|
|
|
# kill
|
|
while kill $osd_pid0; do sleep 1 ; done
|
|
ceph osd down 0
|
|
|
|
ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
|
|
}
|
|
|
|
main osd-bluefs-volume-ops "$@"
|
|
|
|
# Local Variables:
|
|
# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bluefs-volume-ops.sh"
|
|
# End:
|