osd: Track num_objects_repaired in pg stats 2(3)

Leave repair pg state on until recovery finishes or a new scrub starts

Fixes: http://tracker.ceph.com/issues/38616

Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
David Zafman 2019-03-14 16:29:50 -07:00
parent 2202e5d0b1
commit d2ca3d2feb
11 changed files with 113 additions and 8 deletions

View File

@ -110,18 +110,30 @@ function rados_get_data() {
local poolname=pool-rep
local objname=obj-$inject-$$
local pgid=$(get_pg $poolname $objname)
rados_put $dir $poolname $objname || return 1
inject_$inject rep data $poolname $objname $dir 0 || return 1
rados_get $dir $poolname $objname || return 1
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
test "$COUNT" = "1" || return 1
inject_$inject rep data $poolname $objname $dir 0 || return 1
inject_$inject rep data $poolname $objname $dir 1 || return 1
rados_get $dir $poolname $objname || return 1
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
test "$COUNT" = "2" || return 1
inject_$inject rep data $poolname $objname $dir 0 || return 1
inject_$inject rep data $poolname $objname $dir 1 || return 1
inject_$inject rep data $poolname $objname $dir 2 || return 1
rados_get $dir $poolname $objname hang || return 1
# After hang another repair couldn't happen, so count stays the same
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
test "$COUNT" = "2" || return 1
}
function TEST_rados_get_with_eio() {

View File

@ -364,6 +364,10 @@ function TEST_auto_repair_bluestore_scrub() {
diff $dir/ORIGINAL $dir/COPY || return 1
grep scrub_finish $dir/osd.${primary}.log
# This should have caused 1 object to be repaired
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
test "$COUNT" = "1" || return 1
# Tear down
teardown $dir || return 1
}
@ -491,6 +495,59 @@ function TEST_auto_repair_bluestore_failed_norecov() {
teardown $dir || return 1
}
function TEST_repair_stats() {
local dir=$1
local poolname=testpool
local OBJS=30
local REPAIRS=20
# Launch a cluster with 5 seconds scrub interval
setup $dir || return 1
run_mon $dir a || return 1
run_mgr $dir x || return 1
local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
--osd-scrub-interval-randomize-ratio=0"
for id in $(seq 0 2) ; do
run_osd_bluestore $dir $id $ceph_osd_args || return 1
done
create_pool $poolname 1 1 || return 1
ceph osd pool set $poolname size 2
wait_for_clean || return 1
# Put an object
local payload=ABCDEF
echo $payload > $dir/ORIGINAL
for i in $(seq 1 $OBJS)
do
rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
done
# Remove the object from one shard physically
# Restarted osd get $ceph_osd_args passed
local other=$(get_not_primary $poolname obj1)
local pgid=$(get_pg $poolname obj1)
local primary=$(get_primary $poolname obj1)
kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
for i in $(seq 1 $REPAIRS)
do
_objectstore_tool_nodown $dir $other obj$i remove || return 1
done
run_osd_bluestore $dir $other $ceph_osd_args || return 1
repair $pgid
wait_for_clean || return 1
ceph pg dump pgs
# This should have caused 1 object to be repaired
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
test "$COUNT" = "$REPAIRS" || return 1
# Tear down
teardown $dir || return 1
}
function corrupt_and_repair_jerasure() {
local dir=$1
local allow_overwrites=$2

View File

@ -682,6 +682,8 @@ void ECBackend::continue_recovery_op(
stat.num_bytes_recovered = op.recovery_info.size;
stat.num_keys_recovered = 0; // ??? op ... omap_entries.size(); ?
stat.num_objects_recovered = 1;
if (get_parent()->pg_is_repair())
stat.num_objects_repaired = 1;
get_parent()->on_global_recover(op.hoid, stat, false);
dout(10) << __func__ << ": WRITING return " << op << dendl;
recovery_ops.erase(op.hoid);

View File

@ -2335,6 +2335,8 @@ bool PG::queue_scrub()
if (is_scrubbing()) {
return false;
}
// An interrupted recovery repair could leave this set.
state_clear(PG_STATE_REPAIR);
scrubber.priority = scrubber.must_scrub ?
cct->_conf->osd_requested_scrub_priority : get_scrub_priority();
scrubber.must_scrub = false;
@ -2544,6 +2546,8 @@ Context *PG::finish_recovery()
void PG::_finish_recovery(Context *c)
{
lock();
// When recovery is initiated by a repair, that flag is left on
state_clear(PG_STATE_REPAIR);
if (deleting) {
unlock();
return;
@ -5545,11 +5549,12 @@ bool PG::range_intersects_scrub(const hobject_t &start, const hobject_t& end)
end >= scrubber.start);
}
void PG::scrub_clear_state()
void PG::scrub_clear_state(bool has_error)
{
ceph_assert(is_locked());
state_clear(PG_STATE_SCRUBBING);
state_clear(PG_STATE_REPAIR);
if (!has_error)
state_clear(PG_STATE_REPAIR);
state_clear(PG_STATE_DEEP_SCRUB);
publish_stats_to_osd();
@ -5867,7 +5872,7 @@ void PG::scrub_finish()
DoRecovery())));
}
scrub_clear_state();
scrub_clear_state(has_error);
scrub_unreserve_replicas();
if (is_active() && is_primary()) {
@ -7694,6 +7699,7 @@ PG::RecoveryState::NotBackfilling::NotBackfilling(my_context ctx)
{
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
pg->state_clear(PG_STATE_REPAIR);
pg->publish_stats_to_osd();
}

View File

@ -1879,7 +1879,7 @@ protected:
bool scrub_process_inconsistent();
bool ops_blocked_by_scrub() const;
void scrub_finish();
void scrub_clear_state();
void scrub_clear_state(bool keep_repair = false);
void _scan_snaps(ScrubMap &map);
void _repair_oinfo_oid(ScrubMap &map);
void _scan_rollback_obs(const vector<ghobject_t> &rollback_obs);
@ -2957,6 +2957,7 @@ protected:
}
bool is_recovering() const { return state_test(PG_STATE_RECOVERING); }
bool is_premerge() const { return state_test(PG_STATE_PREMERGE); }
bool is_repair() const { return state_test(PG_STATE_REPAIR); }
bool is_empty() const { return info.last_update == eversion_t(0,0); }

View File

@ -227,6 +227,7 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
const hobject_t &hoid) = 0;
virtual bool pg_is_undersized() const = 0;
virtual bool pg_is_repair() const = 0;
virtual void log_operation(
const vector<pg_log_entry_t> &logv,

View File

@ -11493,6 +11493,8 @@ int PrimaryLogPG::recover_missing(
if (!object_missing) {
object_stat_sum_t stat_diff;
stat_diff.num_objects_recovered = 1;
if (scrub_after_recovery)
stat_diff.num_objects_repaired = 1;
on_global_recover(soid, stat_diff, true);
} else {
auto recovery_handle = pgbackend->open_recovery_op();
@ -15230,6 +15232,7 @@ int PrimaryLogPG::rep_repair_primary_object(const hobject_t& soid, OpContext *ct
if (!eio_errors_to_process) {
eio_errors_to_process = true;
ceph_assert(is_clean());
state_set(PG_STATE_REPAIR);
queue_peering_event(
PGPeeringEventRef(
std::make_shared<PGPeeringEvent>(

View File

@ -454,6 +454,10 @@ public:
return is_undersized();
}
bool pg_is_repair() const override {
return is_repair();
}
void update_peer_last_complete_ondisk(
pg_shard_t fromosd,
eversion_t lcod) override {

View File

@ -1725,6 +1725,9 @@ bool ReplicatedBackend::handle_pull_response(
if (complete) {
pi.stat.num_objects_recovered++;
// XXX: This could overcount if regular recovery is needed right after a repair
if (get_parent()->pg_is_repair())
pi.stat.num_objects_repaired++;
clear_pull_from(piter);
to_continue->push_back({hoid, pi.stat});
get_parent()->on_local_recover(
@ -1996,8 +1999,11 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
if (new_progress.is_complete(recovery_info)) {
new_progress.data_complete = true;
if (stat)
if (stat) {
stat->num_objects_recovered++;
if (get_parent()->pg_is_repair())
stat->num_objects_repaired++;
}
}
if (stat) {

View File

@ -2229,11 +2229,12 @@ void object_stat_sum_t::dump(Formatter *f) const
f->dump_int("num_objects_manifest", num_objects_manifest);
f->dump_int("num_omap_bytes", num_omap_bytes);
f->dump_int("num_omap_keys", num_omap_keys);
f->dump_int("num_objects_repaired", num_objects_repaired);
}
void object_stat_sum_t::encode(bufferlist& bl) const
{
ENCODE_START(19, 14, bl);
ENCODE_START(20, 14, bl);
#if defined(CEPH_LITTLE_ENDIAN)
bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
#else
@ -2276,6 +2277,7 @@ void object_stat_sum_t::encode(bufferlist& bl) const
encode(num_objects_manifest, bl);
encode(num_omap_bytes, bl);
encode(num_omap_keys, bl);
encode(num_objects_repaired, bl);
#endif
ENCODE_FINISH(bl);
}
@ -2283,7 +2285,7 @@ void object_stat_sum_t::encode(bufferlist& bl) const
void object_stat_sum_t::decode(bufferlist::const_iterator& bl)
{
bool decode_finish = false;
DECODE_START(19, bl); // make sure to also update fast decode below
DECODE_START(20, bl); // make sure to also update fast decode below
#if defined(CEPH_LITTLE_ENDIAN)
if (struct_v >= 19) { // this must match newest decode version
bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
@ -2340,6 +2342,9 @@ void object_stat_sum_t::decode(bufferlist::const_iterator& bl)
decode(num_omap_bytes, bl);
decode(num_omap_keys, bl);
}
if (struct_v >= 20) {
decode(num_objects_repaired, bl);
}
}
DECODE_FINISH(bl);
}
@ -2383,6 +2388,7 @@ void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
a.num_objects_manifest = 2;
a.num_omap_bytes = 20000;
a.num_omap_keys = 200;
a.num_objects_repaired = 300;
o.push_back(new object_stat_sum_t(a));
}
@ -2427,6 +2433,7 @@ void object_stat_sum_t::add(const object_stat_sum_t& o)
num_objects_manifest += o.num_objects_manifest;
num_omap_bytes += o.num_omap_bytes;
num_omap_keys += o.num_omap_keys;
num_objects_repaired += o.num_objects_repaired;
}
void object_stat_sum_t::sub(const object_stat_sum_t& o)
@ -2470,6 +2477,7 @@ void object_stat_sum_t::sub(const object_stat_sum_t& o)
num_objects_manifest -= o.num_objects_manifest;
num_omap_bytes -= o.num_omap_bytes;
num_omap_keys -= o.num_omap_keys;
num_objects_repaired -= o.num_objects_repaired;
}
bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
@ -2513,7 +2521,8 @@ bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
l.num_large_omap_objects == r.num_large_omap_objects &&
l.num_objects_manifest == r.num_objects_manifest &&
l.num_omap_bytes == r.num_omap_bytes &&
l.num_omap_keys == r.num_omap_keys;
l.num_omap_keys == r.num_omap_keys &&
l.num_objects_repaired == r.num_objects_repaired;
}
// -- object_stat_collection_t --

View File

@ -1773,6 +1773,7 @@ struct object_stat_sum_t {
int64_t num_objects_manifest = 0;
int64_t num_omap_bytes = 0;
int64_t num_omap_keys = 0;
int64_t num_objects_repaired = 0;
object_stat_sum_t()
: num_bytes(0),
@ -1845,6 +1846,7 @@ struct object_stat_sum_t {
FLOOR(num_evict_mode_full);
FLOOR(num_objects_pinned);
FLOOR(num_legacy_snapsets);
FLOOR(num_objects_repaired);
#undef FLOOR
}
@ -1881,6 +1883,7 @@ struct object_stat_sum_t {
SPLIT(num_objects_manifest);
SPLIT(num_omap_bytes);
SPLIT(num_omap_keys);
SPLIT(num_objects_repaired);
SPLIT_PRESERVE_NONZERO(num_shallow_scrub_errors);
SPLIT_PRESERVE_NONZERO(num_deep_scrub_errors);
for (unsigned i = 0; i < out.size(); ++i) {
@ -1945,6 +1948,7 @@ struct object_stat_sum_t {
sizeof(num_objects_manifest) +
sizeof(num_omap_bytes) +
sizeof(num_omap_keys) +
sizeof(num_objects_repaired) +
sizeof(num_objects_recovered) +
sizeof(num_bytes_recovered) +
sizeof(num_keys_recovered) +