mirror of
https://github.com/ceph/ceph
synced 2025-02-22 02:27:29 +00:00
tests/osd: creating a Teuthology test re missing SnapMapper entries
The test (in the standalone/scrub suite) verifies that the scrubber detects (and issues a cluster-log error) whenever a mapping entry ("SNA_") is missing in the SnapMapper DB. Specifically, here the entry is corrupted - shortened as per https://tracker.ceph.com/issues/56147. Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
This commit is contained in:
parent
dc254da593
commit
84d9c4d177
158
qa/standalone/scrub/osd-mapper.sh
Executable file
158
qa/standalone/scrub/osd-mapper.sh
Executable file
@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env bash
|
||||
# -*- mode:text; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
# vim: ts=8 sw=2 smarttab
|
||||
#
|
||||
# test the handling of a corrupted SnapMapper DB by Scrub
|
||||
|
||||
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
||||
source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh
|
||||
|
||||
function run() {
|
||||
local dir=$1
|
||||
shift
|
||||
|
||||
export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
||||
|
||||
export -n CEPH_CLI_TEST_DUP_COMMAND
|
||||
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||
for func in $funcs ; do
|
||||
setup $dir || return 1
|
||||
$func $dir || return 1
|
||||
teardown $dir || return 1
|
||||
done
|
||||
}
|
||||
|
||||
# one clone & multiple snaps (according to the number of parameters)
|
||||
function make_a_clone()
|
||||
{
|
||||
#turn off '-x' (but remember previous state)
|
||||
local saved_echo_flag=${-//[^x]/}
|
||||
set +x
|
||||
local pool=$1
|
||||
local obj=$2
|
||||
echo $RANDOM | rados -p $pool put $obj - || return 1
|
||||
shift 2
|
||||
for snap in $@ ; do
|
||||
rados -p $pool mksnap $snap || return 1
|
||||
done
|
||||
if [[ -n "$saved_echo_flag" ]]; then set -x; fi
|
||||
}
|
||||
|
||||
function TEST_truncated_sna_record() {
|
||||
local dir=$1
|
||||
local -A cluster_conf=(
|
||||
['osds_num']="3"
|
||||
['pgs_in_pool']="4"
|
||||
['pool_name']="test"
|
||||
)
|
||||
|
||||
local extr_dbg=1
|
||||
(( extr_dbg > 1 )) && echo "Dir: $dir"
|
||||
standard_scrub_cluster $dir cluster_conf
|
||||
ceph tell osd.* config set osd_stats_update_period_not_scrubbing "1"
|
||||
ceph tell osd.* config set osd_stats_update_period_scrubbing "1"
|
||||
|
||||
local osdn=${cluster_conf['osds_num']}
|
||||
local poolid=${cluster_conf['pool_id']}
|
||||
local poolname=${cluster_conf['pool_name']}
|
||||
local objname="objxxx"
|
||||
|
||||
# create an object and clone it
|
||||
make_a_clone $poolname $objname snap01 snap02 || return 1
|
||||
make_a_clone $poolname $objname snap13 || return 1
|
||||
make_a_clone $poolname $objname snap24 snap25 || return 1
|
||||
echo $RANDOM | rados -p $poolname put $objname - || return 1
|
||||
|
||||
#identify the PG and the primary OSD
|
||||
local pgid=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.pgid'`
|
||||
local osd=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'`
|
||||
echo "pgid is $pgid (primary: osd.$osd)"
|
||||
# turn on the publishing of test data in the 'scrubber' section of 'pg query' output
|
||||
set_query_debug $pgid
|
||||
|
||||
# verify the existence of these clones
|
||||
(( extr_dbg >= 1 )) && rados --format json-pretty -p $poolname listsnaps $objname
|
||||
|
||||
# scrub the PG
|
||||
ceph pg $pgid deep_scrub || return 1
|
||||
|
||||
# we aren't just waiting for the scrub to terminate, but also for the
|
||||
# logs to be published
|
||||
sleep 3
|
||||
ceph pg dump pgs
|
||||
until grep -a -q -- "event: --^^^^---- ScrubFinished" $dir/osd.$osd.log ; do
|
||||
sleep 0.2
|
||||
done
|
||||
|
||||
ceph pg dump pgs
|
||||
ceph osd set noscrub || return 1
|
||||
ceph osd set nodeep-scrub || return 1
|
||||
sleep 5
|
||||
grep -a -q -v "ERR" $dir/osd.$osd.log || return 1
|
||||
|
||||
# kill the OSDs
|
||||
kill_daemons $dir TERM osd || return 1
|
||||
|
||||
(( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/0 dump "p"
|
||||
(( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump "p" | grep -a SNA_
|
||||
(( extr_dbg >= 2 )) && grep -a SNA_ /tmp/oo2.dump
|
||||
(( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump p 2> /dev/null
|
||||
|
||||
for sdn in $(seq 0 $(expr $osdn - 1))
|
||||
do
|
||||
kvdir=$dir/$sdn
|
||||
echo "corrupting the SnapMapper DB of osd.$sdn (db: $kvdir)"
|
||||
(( extr_dbg >= 3 )) && ceph-kvstore-tool bluestore-kv $kvdir dump "p"
|
||||
|
||||
# truncate the 'mapping' (SNA_) entry corresponding to the snap13 clone
|
||||
KY=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_0000000000000003_000000000000000' \
|
||||
| awk -e '{print $2;}'`
|
||||
(( extr_dbg >= 1 )) && echo "SNA key: $KY" | cat -v
|
||||
|
||||
tmp_fn1=`mktemp -p /tmp --suffix="_the_val"`
|
||||
(( extr_dbg >= 1 )) && echo "Value dumped in: $tmp_fn1"
|
||||
ceph-kvstore-tool bluestore-kv $kvdir get p "$KY" out $tmp_fn1 2> /dev/null
|
||||
(( extr_dbg >= 2 )) && od -xc $tmp_fn1
|
||||
|
||||
NKY=${KY:0:-30}
|
||||
ceph-kvstore-tool bluestore-kv $kvdir rm "p" "$KY" 2> /dev/null
|
||||
ceph-kvstore-tool bluestore-kv $kvdir set "p" "$NKY" in $tmp_fn1 2> /dev/null
|
||||
|
||||
(( extr_dbg >= 1 )) || rm $tmp_fn1
|
||||
done
|
||||
|
||||
orig_osd_args=" ${cluster_conf['osd_args']}"
|
||||
orig_osd_args=" $(echo $orig_osd_args)"
|
||||
(( extr_dbg >= 2 )) && echo "Copied OSD args: /$orig_osd_args/ /${orig_osd_args:1}/"
|
||||
for sdn in $(seq 0 $(expr $osdn - 1))
|
||||
do
|
||||
CEPH_ARGS="$CEPH_ARGS $orig_osd_args" activate_osd $dir $sdn
|
||||
done
|
||||
sleep 1
|
||||
|
||||
for sdn in $(seq 0 $(expr $osdn - 1))
|
||||
do
|
||||
timeout 60 ceph tell osd.$sdn version
|
||||
done
|
||||
|
||||
# when scrubbing now - we expect the scrub to emit a cluster log ERR message regarding SnapMapper internal inconsistency
|
||||
ceph osd unset nodeep-scrub || return 1
|
||||
ceph osd unset noscrub || return 1
|
||||
|
||||
# what is the primary now?
|
||||
local cur_prim=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'`
|
||||
ceph pg dump pgs
|
||||
sleep 2
|
||||
ceph pg $pgid deep_scrub || return 1
|
||||
sleep 5
|
||||
ceph pg dump pgs
|
||||
(( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log
|
||||
grep -a -q "ERR" $dir/osd.$cur_prim.log || return 1
|
||||
}
|
||||
|
||||
|
||||
|
||||
main osd-mapper "$@"
|
@ -243,7 +243,7 @@ function standard_scrub_cluster() {
|
||||
|
||||
for osd in $(seq 0 $(expr $OSDS - 1))
|
||||
do
|
||||
run_osd $dir $osd $ceph_osd_args || return 1
|
||||
run_osd $dir $osd $(echo $ceph_osd_args) || return 1
|
||||
done
|
||||
|
||||
create_pool $poolname $pg_num $pg_num
|
||||
@ -254,6 +254,7 @@ function standard_scrub_cluster() {
|
||||
name_n_id=`ceph osd dump | awk '/^pool.*'$poolname'/ { gsub(/'"'"'/," ",$3); print $3," ", $2}'`
|
||||
echo "standard_scrub_cluster: $debug_msg: test pool is $name_n_id"
|
||||
args['pool_id']="${name_n_id##* }"
|
||||
args['osd_args']=$ceph_osd_args
|
||||
if [[ -n "$saved_echo_flag" ]]; then set -x; fi
|
||||
}
|
||||
|
||||
|
@ -1892,13 +1892,15 @@ std::optional<snap_mapper_fix_t> ScrubBackend::scan_object_snaps(
|
||||
}
|
||||
|
||||
/*
|
||||
* Process:
|
||||
* Building a map of objects suitable for snapshot validation.
|
||||
* The data in m_cleaned_meta_map is the leftover partial items that need to
|
||||
* be completed before they can be processed.
|
||||
*
|
||||
* Snapshots in maps precede the head object, which is why we are scanning
|
||||
* backwards.
|
||||
* We are moving all "full" clone sets, i.e. the head and (preceding it, as
|
||||
* snapshots precede the head entry) the clone entries, into 'for_meta_scrub'.
|
||||
* That collection, not containing partial items, will be scrubbed by
|
||||
* scrub_snapshot_metadata().
|
||||
*
|
||||
* What's left in m_cleaned_meta_map is the leftover partial items that need to
|
||||
* be completed before they can be processed.
|
||||
*/
|
||||
ScrubMap ScrubBackend::clean_meta_map(ScrubMap& cleaned, bool max_reached)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user