mirror of
https://github.com/ceph/ceph
synced 2024-12-27 14:03:25 +00:00
qa: Add subvolume clone and snapshot rm tests when osd is full
Fixes: https://tracker.ceph.com/issues/55976 Signed-off-by: Kotresh HR <khiremat@redhat.com>
This commit is contained in:
parent
ec57215d50
commit
a64f049614
@ -15,7 +15,17 @@ overrides:
|
||||
bluestore block size: 1073741824
|
||||
tasks:
|
||||
- workunit:
|
||||
cleanup: false
|
||||
cleanup: true
|
||||
clients:
|
||||
client.0:
|
||||
- fs/full/subvolume_rm.sh
|
||||
- workunit:
|
||||
cleanup: true
|
||||
clients:
|
||||
client.0:
|
||||
- fs/full/subvolume_clone.sh
|
||||
- workunit:
|
||||
cleanup: true
|
||||
clients:
|
||||
client.0:
|
||||
- fs/full/subvolume_snapshot_rm.sh
|
||||
|
114
qa/workunits/fs/full/subvolume_clone.sh
Executable file
114
qa/workunits/fs/full/subvolume_clone.sh
Executable file
@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env bash
|
||||
set -ex
|
||||
|
||||
# This testcase tests the 'ceph fs subvolume snapshot clone' when the osd is full.
|
||||
# The clone fails with 'MetadataMgrException: -28 (error in write)' and
|
||||
# truncates the config file of corresponding subvolume while updating the config file.
|
||||
# Hence the subsequent subvolume commands on the clone fails with
|
||||
# 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback.
|
||||
|
||||
# The osd is of the size 1GB. The full-ratios are set so that osd is treated full
|
||||
# at around 600MB. The subvolume is created and 100MB is written.
|
||||
# The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds,
|
||||
# all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails
|
||||
# with ENOSPACE.
|
||||
|
||||
# At this stage, ".meta" config file of the failed clones are checked if it's truncated.
|
||||
# and clone status command is checked for traceback.
|
||||
|
||||
# Note that the failed clones would be in retry loop and it's state would be 'pending' or 'in-progress'.
|
||||
# It's state is not updated to 'failed' as the config update to gets ENOSPACE too.
|
||||
|
||||
set -e
|
||||
ignore_failure() {
|
||||
if "$@"; then return 0; else return 0; fi
|
||||
}
|
||||
|
||||
expect_failure() {
|
||||
if "$@"; then return 1; else return 0; fi
|
||||
}
|
||||
|
||||
NUM_CLONES=10
|
||||
|
||||
ceph fs subvolume create cephfs sub_0
|
||||
subvol_path_0=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
|
||||
|
||||
# For debugging
|
||||
echo "Before ratios are set"
|
||||
df $CEPH_MNT
|
||||
ceph osd df
|
||||
|
||||
ceph osd set-full-ratio 0.6
|
||||
ceph osd set-nearfull-ratio 0.50
|
||||
ceph osd set-backfillfull-ratio 0.55
|
||||
|
||||
# For debugging
|
||||
echo "After ratios are set"
|
||||
df -h
|
||||
ceph osd df
|
||||
|
||||
for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done
|
||||
|
||||
# For debugging
|
||||
echo "After subvolumes are written"
|
||||
df -h $CEPH_MNT
|
||||
ceph osd df
|
||||
|
||||
# snapshot
|
||||
ceph fs subvolume snapshot create cephfs sub_0 snap_0
|
||||
|
||||
# Set clone snapshot delay
|
||||
ceph config set mgr mgr/volumes/snapshot_clone_delay 15
|
||||
|
||||
# Schedule few clones, some would fail with no space
|
||||
for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done
|
||||
|
||||
# Wait for osd is full
|
||||
timeout=90
|
||||
while [ $timeout -gt 0 ]
|
||||
do
|
||||
health=$(ceph health detail)
|
||||
[[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
|
||||
echo "Wating for osd to be full: $timeout"
|
||||
sleep 1
|
||||
let "timeout-=1"
|
||||
done
|
||||
|
||||
# For debugging
|
||||
echo "After osd is full"
|
||||
df -h $CEPH_MNT
|
||||
ceph osd df
|
||||
|
||||
# Check clone status, this should not crash
|
||||
for i in $(eval echo {1..$NUM_CLONES})
|
||||
do
|
||||
ignore_failure ceph fs clone status cephfs clone_$i >/tmp/out_${PID}_file 2>/tmp/error_${PID}_file
|
||||
cat /tmp/error_${PID}_file
|
||||
if grep "complete" /tmp/out_${PID}_file; then
|
||||
echo "The clone_$i is completed"
|
||||
else
|
||||
#in-progress/pending clones, No traceback should be found in stderr
|
||||
echo clone_$i in PENDING/IN-PROGRESS
|
||||
expect_failure sudo grep "Traceback" /tmp/error_${PID}_file
|
||||
#config file should not be truncated and GLOBAL section should be found
|
||||
sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/clone_$i/.meta
|
||||
fi
|
||||
done
|
||||
|
||||
# Hard cleanup
|
||||
ignore_failure sudo rm -rf $CEPH_MNT/_index/clone/*
|
||||
ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/clone_*
|
||||
ignore_failure sudo rmdir $CEPH_MNT/volumes/_nogroup/sub_0/.snap/snap_0
|
||||
ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0
|
||||
|
||||
#Set the ratios back for other full tests to run
|
||||
ceph osd set-full-ratio 0.95
|
||||
ceph osd set-nearfull-ratio 0.95
|
||||
ceph osd set-backfillfull-ratio 0.95
|
||||
|
||||
#After test
|
||||
echo "After test"
|
||||
df -h $CEPH_MNT
|
||||
ceph osd df
|
||||
|
||||
echo OK
|
@ -59,4 +59,14 @@ do
|
||||
let "timeout-=1"
|
||||
done
|
||||
|
||||
#Set the ratios back for other full tests to run
|
||||
ceph osd set-full-ratio 0.95
|
||||
ceph osd set-nearfull-ratio 0.95
|
||||
ceph osd set-backfillfull-ratio 0.95
|
||||
|
||||
#After test
|
||||
echo "After test"
|
||||
df -h
|
||||
ceph osd df
|
||||
|
||||
echo OK
|
||||
|
84
qa/workunits/fs/full/subvolume_snapshot_rm.sh
Executable file
84
qa/workunits/fs/full/subvolume_snapshot_rm.sh
Executable file
@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env bash
|
||||
set -ex
|
||||
|
||||
# This testcase tests the 'ceph fs subvolume snapshot rm' when the osd is full.
|
||||
# The snapshot rm fails with 'MetadataMgrException: -28 (error in write)' and
|
||||
# truncates the config file of corresponding subvolume. Hence the subsequent
|
||||
# snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)'
|
||||
# traceback.
|
||||
|
||||
# The osd is of the size 1GB. The subvolume is created and 800MB file is written.
|
||||
# Then full-ratios are set below 500MB such that the osd is treated as full.
|
||||
# The subvolume snapshot is taken which succeeds as no extra space is required
|
||||
# for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it
|
||||
# fails to remove the snapshot metadata set. The snapshot removal fails
|
||||
# but should not traceback and truncate the config file.
|
||||
|
||||
set -e
|
||||
expect_failure() {
|
||||
if "$@"; then return 1; else return 0; fi
|
||||
}
|
||||
|
||||
ignore_failure() {
|
||||
if "$@"; then return 0; else return 0; fi
|
||||
}
|
||||
|
||||
ceph fs subvolume create cephfs sub_0
|
||||
subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
|
||||
|
||||
#For debugging
|
||||
echo "Before write"
|
||||
df $CEPH_MNT
|
||||
ceph osd df
|
||||
|
||||
# Write 800MB file and set full ratio to around 200MB
|
||||
ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync
|
||||
|
||||
ceph osd set-full-ratio 0.2
|
||||
ceph osd set-nearfull-ratio 0.16
|
||||
ceph osd set-backfillfull-ratio 0.18
|
||||
|
||||
timeout=30
|
||||
while [ $timeout -gt 0 ]
|
||||
do
|
||||
health=$(ceph health detail)
|
||||
[[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
|
||||
echo "Wating for osd to be full: $timeout"
|
||||
sleep 1
|
||||
let "timeout-=1"
|
||||
done
|
||||
|
||||
#Take snapshot
|
||||
ceph fs subvolume snapshot create cephfs sub_0 snap_0
|
||||
|
||||
#Remove snapshot fails but should not throw traceback
|
||||
expect_failure ceph fs subvolume snapshot rm cephfs sub_0 snap_0 2>/tmp/error_${PID}_file
|
||||
cat /tmp/error_${PID}_file
|
||||
|
||||
# No traceback should be found
|
||||
expect_failure grep "Traceback" /tmp/error_${PID}_file
|
||||
|
||||
# Validate config file is not truncated and GLOBAL section exists
|
||||
sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/sub_0/.meta
|
||||
|
||||
#For debugging
|
||||
echo "After write"
|
||||
df $CEPH_MNT
|
||||
ceph osd df
|
||||
|
||||
#Cleanup from backend
|
||||
ignore_failure sudo rm -f /tmp/error_${PID}_file
|
||||
ignore_failure sudo rmdir $CEPH_MNT/volumes/_nogroup/sub_0/.snap/snap_0
|
||||
ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0
|
||||
|
||||
#Set the ratios back for other full tests to run
|
||||
ceph osd set-full-ratio 0.95
|
||||
ceph osd set-nearfull-ratio 0.95
|
||||
ceph osd set-backfillfull-ratio 0.95
|
||||
|
||||
#After test
|
||||
echo "After test"
|
||||
df -h $CEPH_MNT
|
||||
ceph osd df
|
||||
|
||||
echo OK
|
Loading…
Reference in New Issue
Block a user