2018-08-03 03:20:41 +00:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
set -x
|
|
|
|
|
|
|
|
# run on a single-node three-OSD cluster
|
|
|
|
|
|
|
|
sudo killall -ABRT ceph-osd
|
|
|
|
sleep 5
|
|
|
|
|
|
|
|
# kill caused coredumps; find them and delete them, carefully, so as
|
|
|
|
# not to disturb other coredumps, or else teuthology will see them
|
|
|
|
# and assume test failure. sudos are because the core files are
|
|
|
|
# root/600
|
|
|
|
for f in $(find $TESTDIR/archive/coredump -type f); do
|
|
|
|
gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
|
|
|
|
if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
|
|
|
|
( \
|
|
|
|
|
|
|
|
expr match "$gdb_output" ".*terminated.*signal 6.*" || \
|
|
|
|
expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
|
|
|
|
)
|
|
|
|
then
|
|
|
|
sudo rm $f
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2022-12-08 00:09:16 +00:00
|
|
|
# ceph-crash runs as the unprivileged "ceph" user, but when under test
|
|
|
|
# the ceph osd daemons are running as root, so their crash files aren't
|
|
|
|
# readable. let's chown them so they behave as they would in real life.
|
|
|
|
sudo chown -R ceph:ceph /var/lib/ceph/crash
|
|
|
|
|
2018-08-03 03:20:41 +00:00
|
|
|
# let daemon find crashdumps on startup
|
|
|
|
sudo systemctl restart ceph-crash
|
|
|
|
sleep 30
|
|
|
|
|
|
|
|
# must be 3 crashdumps registered and moved to crash/posted
|
2019-07-12 21:34:37 +00:00
|
|
|
[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header
|
2018-08-03 03:20:41 +00:00
|
|
|
[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1
|
2019-07-16 14:26:21 +00:00
|
|
|
|
|
|
|
# there should be a health warning
|
|
|
|
ceph health detail | grep RECENT_CRASH || exit 1
|
|
|
|
ceph crash archive-all
|
|
|
|
sleep 30
|
|
|
|
ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone!
|