ceph/qa/workunits/rados/test_crash.sh

#!/bin/sh

set -x

# run on a single-node three-OSD cluster

sudo killall -ABRT ceph-osd
sleep 5

# kill caused coredumps; find them and delete them, carefully, so as
# not to disturb other coredumps, or else teuthology will see them
# and assume test failure.  sudos are because the core files are
# root/600
for f in $(find $TESTDIR/archive/coredump -type f); do
	gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
	if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
	   ( \

	   	expr match "$gdb_output" ".*terminated.*signal 6.*" || \
	   	expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
	   )
	then
		sudo rm $f
	fi
done

# ceph-crash runs as the unprivileged "ceph" user, but when under test
# the ceph osd daemons are running as root, so their crash files aren't
# readable.  let's chown them so they behave as they would in real life.
sudo chown -R ceph:ceph /var/lib/ceph/crash

# let daemon find crashdumps on startup
sudo systemctl restart ceph-crash
sleep 30

# must be 3 crashdumps registered and moved to crash/posted
[ $(ceph crash ls | wc -l) = 4 ]  || exit 1   # 4 here bc of the table header
[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1

# there should be a health warning
ceph health detail | grep RECENT_CRASH || exit 1
ceph crash archive-all
sleep 30
ceph health detail | grep -c RECENT_CRASH | grep 0     # should be gone!
qa/suites/rados, qa/workunits/rados: Add suite/workunit for ceph-crash Signed-off-by: Dan Mick <dan.mick@redhat.com> 2018-08-03 03:20:41 +00:00			`#!/bin/sh`

			`set -x`

			`# run on a single-node three-OSD cluster`

			`sudo killall -ABRT ceph-osd`
			`sleep 5`

			`# kill caused coredumps; find them and delete them, carefully, so as`
			`# not to disturb other coredumps, or else teuthology will see them`
			`# and assume test failure. sudos are because the core files are`
			`# root/600`
			`for f in $(find $TESTDIR/archive/coredump -type f); do`
			`gdb_output=$(echo "quit" \| sudo gdb /usr/bin/ceph-osd $f)`
			`if expr match "$gdb_output" ".generated.ceph-osd.*" && \`
			`( \`

			`expr match "$gdb_output" ".terminated.signal 6.*" \|\| \`
			`expr match "$gdb_output" ".terminated.signal SIGABRT.*" \`
			`)`
			`then`
			`sudo rm $f`
			`fi`
			`done`

qa/workunits/rados/test_crash: chown crash files to ceph user Fixes: https://tracker.ceph.com/issues/58098 Signed-off-by: Tim Serong <tserong@suse.com> 2022-12-08 00:09:16 +00:00			`# ceph-crash runs as the unprivileged "ceph" user, but when under test`
			`# the ceph osd daemons are running as root, so their crash files aren't`
			`# readable. let's chown them so they behave as they would in real life.`
			`sudo chown -R ceph:ceph /var/lib/ceph/crash`

qa/suites/rados, qa/workunits/rados: Add suite/workunit for ceph-crash Signed-off-by: Dan Mick <dan.mick@redhat.com> 2018-08-03 03:20:41 +00:00			`# let daemon find crashdumps on startup`
			`sudo systemctl restart ceph-crash`
			`sleep 30`

			`# must be 3 crashdumps registered and moved to crash/posted`
mgr/crash: make 'crash ls' a nice table with a NEW column Signed-off-by: Sage Weil <sage@redhat.com> 2019-07-12 21:34:37 +00:00			`[ $(ceph crash ls \| wc -l) = 4 ] \|\| exit 1 # 4 here bc of the table header`
qa/suites/rados, qa/workunits/rados: Add suite/workunit for ceph-crash Signed-off-by: Dan Mick <dan.mick@redhat.com> 2018-08-03 03:20:41 +00:00			`[ $(sudo find /var/lib/ceph/crash/posted/ -name meta \| wc -l) = 3 ] \|\| exit 1`
qa/workunits/rados/test_crash: health check Signed-off-by: Sage Weil <sage@redhat.com> 2019-07-16 14:26:21 +00:00
			`# there should be a health warning`
			`ceph health detail \| grep RECENT_CRASH \|\| exit 1`
			`ceph crash archive-all`
			`sleep 30`
			`ceph health detail \| grep -c RECENT_CRASH \| grep 0 # should be gone!`