mirror of
https://github.com/ceph/ceph
synced 2025-02-21 18:17:42 +00:00
Merge pull request #17920 from dzafman/wip-21382
Erasure code recovery should send additional reads if necessary Fixes: http://tracker.ceph.com/issues/21382 Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
commit
2f466f8b26
@ -43,7 +43,10 @@ function run() {
|
||||
}
|
||||
|
||||
function setup_osds() {
|
||||
for id in $(seq 0 3) ; do
|
||||
local count=$1
|
||||
shift
|
||||
|
||||
for id in $(seq 0 $(expr $count - 1)) ; do
|
||||
run_osd $dir $id || return 1
|
||||
done
|
||||
wait_for_clean || return 1
|
||||
@ -55,10 +58,15 @@ function setup_osds() {
|
||||
|
||||
function create_erasure_coded_pool() {
|
||||
local poolname=$1
|
||||
shift
|
||||
local k=$1
|
||||
shift
|
||||
local m=$1
|
||||
shift
|
||||
|
||||
ceph osd erasure-code-profile set myprofile \
|
||||
plugin=jerasure \
|
||||
k=2 m=1 \
|
||||
k=$k m=$m \
|
||||
crush-failure-domain=osd || return 1
|
||||
create_pool $poolname 1 1 erasure myprofile \
|
||||
|| return 1
|
||||
@ -129,13 +137,13 @@ function rados_put_get() {
|
||||
# recovery didn't crash the primary.
|
||||
#
|
||||
local -a initial_osds=($(get_osds $poolname $objname))
|
||||
local last=$((${#initial_osds[@]} - 1))
|
||||
local last_osd=${initial_osds[-1]}
|
||||
# Kill OSD
|
||||
kill_daemons $dir TERM osd.${initial_osds[$last]} >&2 < /dev/null || return 1
|
||||
ceph osd out ${initial_osds[$last]} || return 1
|
||||
! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1
|
||||
ceph osd in ${initial_osds[$last]} || return 1
|
||||
run_osd $dir ${initial_osds[$last]} || return 1
|
||||
kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
|
||||
ceph osd out ${last_osd} || return 1
|
||||
! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
|
||||
ceph osd in ${last_osd} || return 1
|
||||
run_osd $dir ${last_osd} || return 1
|
||||
wait_for_clean || return 1
|
||||
fi
|
||||
|
||||
@ -269,10 +277,10 @@ function rados_get_data_bad_size() {
|
||||
#
|
||||
function TEST_rados_get_subread_eio_shard_0() {
|
||||
local dir=$1
|
||||
setup_osds || return 1
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname || return 1
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject eio on primary OSD (0) and replica OSD (1)
|
||||
local shard_id=0
|
||||
rados_get_data eio $dir $shard_id || return 1
|
||||
@ -281,10 +289,10 @@ function TEST_rados_get_subread_eio_shard_0() {
|
||||
|
||||
function TEST_rados_get_subread_eio_shard_1() {
|
||||
local dir=$1
|
||||
setup_osds || return 1
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname || return 1
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject eio into replicas OSD (1) and OSD (2)
|
||||
local shard_id=1
|
||||
rados_get_data eio $dir $shard_id || return 1
|
||||
@ -296,10 +304,10 @@ function TEST_rados_get_subread_eio_shard_1() {
|
||||
|
||||
function TEST_rados_get_subread_missing() {
|
||||
local dir=$1
|
||||
setup_osds || return 1
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname || return 1
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject remove into replicas OSD (1) and OSD (2)
|
||||
local shard_id=1
|
||||
rados_get_data remove $dir $shard_id || return 1
|
||||
@ -316,10 +324,10 @@ function TEST_rados_get_subread_missing() {
|
||||
#
|
||||
function TEST_rados_get_bad_size_shard_0() {
|
||||
local dir=$1
|
||||
setup_osds || return 1
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname || return 1
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# Set incorrect size into primary OSD (0) and replica OSD (1)
|
||||
local shard_id=0
|
||||
rados_get_data_bad_size $dir $shard_id 10 || return 1
|
||||
@ -330,10 +338,10 @@ function TEST_rados_get_bad_size_shard_0() {
|
||||
|
||||
function TEST_rados_get_bad_size_shard_1() {
|
||||
local dir=$1
|
||||
setup_osds || return 1
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname || return 1
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# Set incorrect size into replicas OSD (1) and OSD (2)
|
||||
local shard_id=1
|
||||
rados_get_data_bad_size $dir $shard_id 10 || return 1
|
||||
@ -346,10 +354,10 @@ function TEST_rados_get_with_subreadall_eio_shard_0() {
|
||||
local dir=$1
|
||||
local shard_id=0
|
||||
|
||||
setup_osds || return 1
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname || return 1
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject eio on primary OSD (0)
|
||||
local shard_id=0
|
||||
rados_get_data_recovery eio $dir $shard_id || return 1
|
||||
@ -361,10 +369,10 @@ function TEST_rados_get_with_subreadall_eio_shard_1() {
|
||||
local dir=$1
|
||||
local shard_id=0
|
||||
|
||||
setup_osds || return 1
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname || return 1
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject eio on replica OSD (1)
|
||||
local shard_id=1
|
||||
rados_get_data_recovery eio $dir $shard_id || return 1
|
||||
@ -372,6 +380,34 @@ function TEST_rados_get_with_subreadall_eio_shard_1() {
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
# Test recovery the first k copies aren't all available
|
||||
function TEST_ec_recovery_errors() {
|
||||
local dir=$1
|
||||
local objname=myobject
|
||||
|
||||
setup_osds 7 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname 3 2 || return 1
|
||||
|
||||
rados_put $dir $poolname $objname || return 1
|
||||
inject_eio ec data $poolname $objname $dir 0 || return 1
|
||||
|
||||
local -a initial_osds=($(get_osds $poolname $objname))
|
||||
local last_osd=${initial_osds[-1]}
|
||||
# Kill OSD
|
||||
kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
|
||||
ceph osd down ${last_osd} || return 1
|
||||
ceph osd out ${last_osd} || return 1
|
||||
|
||||
# Cluster should recover this object
|
||||
wait_for_clean || return 1
|
||||
|
||||
#rados_get_data_recovery eio $dir $shard_id || return 1
|
||||
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
main test-erasure-eio "$@"
|
||||
|
||||
# Local Variables:
|
||||
|
@ -1187,8 +1187,7 @@ void ECBackend::handle_sub_read_reply(
|
||||
unsigned is_complete = 0;
|
||||
// For redundant reads check for completion as each shard comes in,
|
||||
// or in a non-recovery read check for completion once all the shards read.
|
||||
// TODO: It would be nice if recovery could send more reads too
|
||||
if (rop.do_redundant_reads || (!rop.for_recovery && rop.in_progress.empty())) {
|
||||
if (rop.do_redundant_reads || rop.in_progress.empty()) {
|
||||
for (map<hobject_t, read_result_t>::const_iterator iter =
|
||||
rop.complete.begin();
|
||||
iter != rop.complete.end();
|
||||
@ -1485,19 +1484,12 @@ void ECBackend::call_write_ordered(std::function<void(void)> &&cb) {
|
||||
}
|
||||
}
|
||||
|
||||
int ECBackend::get_min_avail_to_read_shards(
|
||||
void ECBackend::get_all_avail_shards(
|
||||
const hobject_t &hoid,
|
||||
const set<int> &want,
|
||||
bool for_recovery,
|
||||
bool do_redundant_reads,
|
||||
set<pg_shard_t> *to_read)
|
||||
set<int> &have,
|
||||
map<shard_id_t, pg_shard_t> &shards,
|
||||
bool for_recovery)
|
||||
{
|
||||
// Make sure we don't do redundant reads for recovery
|
||||
assert(!for_recovery || !do_redundant_reads);
|
||||
|
||||
set<int> have;
|
||||
map<shard_id_t, pg_shard_t> shards;
|
||||
|
||||
for (set<pg_shard_t>::const_iterator i =
|
||||
get_parent()->get_acting_shards().begin();
|
||||
i != get_parent()->get_acting_shards().end();
|
||||
@ -1548,6 +1540,22 @@ int ECBackend::get_min_avail_to_read_shards(
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ECBackend::get_min_avail_to_read_shards(
|
||||
const hobject_t &hoid,
|
||||
const set<int> &want,
|
||||
bool for_recovery,
|
||||
bool do_redundant_reads,
|
||||
set<pg_shard_t> *to_read)
|
||||
{
|
||||
// Make sure we don't do redundant reads for recovery
|
||||
assert(!for_recovery || !do_redundant_reads);
|
||||
|
||||
set<int> have;
|
||||
map<shard_id_t, pg_shard_t> shards;
|
||||
|
||||
get_all_avail_shards(hoid, have, shards, for_recovery);
|
||||
|
||||
set<int> need;
|
||||
int r = ec_impl->minimum_to_decode(want, have, &need);
|
||||
@ -1573,30 +1581,18 @@ int ECBackend::get_min_avail_to_read_shards(
|
||||
int ECBackend::get_remaining_shards(
|
||||
const hobject_t &hoid,
|
||||
const set<int> &avail,
|
||||
set<pg_shard_t> *to_read)
|
||||
set<pg_shard_t> *to_read,
|
||||
bool for_recovery)
|
||||
{
|
||||
set<int> need;
|
||||
assert(to_read);
|
||||
|
||||
set<int> have;
|
||||
map<shard_id_t, pg_shard_t> shards;
|
||||
|
||||
for (set<pg_shard_t>::const_iterator i =
|
||||
get_parent()->get_acting_shards().begin();
|
||||
i != get_parent()->get_acting_shards().end();
|
||||
++i) {
|
||||
dout(10) << __func__ << ": checking acting " << *i << dendl;
|
||||
const pg_missing_t &missing = get_parent()->get_shard_missing(*i);
|
||||
if (!missing.is_missing(hoid)) {
|
||||
assert(!need.count(i->shard));
|
||||
need.insert(i->shard);
|
||||
assert(!shards.count(i->shard));
|
||||
shards.insert(make_pair(i->shard, *i));
|
||||
}
|
||||
}
|
||||
get_all_avail_shards(hoid, have, shards, for_recovery);
|
||||
|
||||
if (!to_read)
|
||||
return 0;
|
||||
|
||||
for (set<int>::iterator i = need.begin();
|
||||
i != need.end();
|
||||
for (set<int>::iterator i = have.begin();
|
||||
i != have.end();
|
||||
++i) {
|
||||
assert(shards.count(shard_id_t(*i)));
|
||||
if (avail.find(*i) == avail.end())
|
||||
@ -2317,7 +2313,7 @@ int ECBackend::send_all_remaining_reads(
|
||||
already_read.insert(i->shard);
|
||||
dout(10) << __func__ << " have/error shards=" << already_read << dendl;
|
||||
set<pg_shard_t> shards;
|
||||
int r = get_remaining_shards(hoid, already_read, &shards);
|
||||
int r = get_remaining_shards(hoid, already_read, &shards, rop.for_recovery);
|
||||
if (r)
|
||||
return r;
|
||||
if (shards.empty())
|
||||
|
@ -320,6 +320,11 @@ private:
|
||||
const PushReplyOp &op,
|
||||
pg_shard_t from,
|
||||
RecoveryMessages *m);
|
||||
void get_all_avail_shards(
|
||||
const hobject_t &hoid,
|
||||
set<int> &have,
|
||||
map<shard_id_t, pg_shard_t> &shards,
|
||||
bool for_recovery);
|
||||
|
||||
public:
|
||||
/**
|
||||
@ -650,7 +655,8 @@ public:
|
||||
int get_remaining_shards(
|
||||
const hobject_t &hoid,
|
||||
const set<int> &avail,
|
||||
set<pg_shard_t> *to_read);
|
||||
set<pg_shard_t> *to_read,
|
||||
bool for_recovery);
|
||||
|
||||
int objects_get_attrs(
|
||||
const hobject_t &hoid,
|
||||
|
Loading…
Reference in New Issue
Block a user