From d2ca3d2feb442f97ca89023c7d01178d96f517a6 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 14 Mar 2019 16:29:50 -0700 Subject: [PATCH] osd: Track num_objects_repaired in pg stats 2(3) Leave repair pg state on until recovery finishes or a new scrub starts Fixes: http://tracker.ceph.com/issues/38616 Signed-off-by: David Zafman --- qa/standalone/osd/osd-rep-recov-eio.sh | 12 ++++++ qa/standalone/scrub/osd-scrub-repair.sh | 57 +++++++++++++++++++++++++ src/osd/ECBackend.cc | 2 + src/osd/PG.cc | 12 ++++-- src/osd/PG.h | 3 +- src/osd/PGBackend.h | 1 + src/osd/PrimaryLogPG.cc | 3 ++ src/osd/PrimaryLogPG.h | 4 ++ src/osd/ReplicatedBackend.cc | 8 +++- src/osd/osd_types.cc | 15 +++++-- src/osd/osd_types.h | 4 ++ 11 files changed, 113 insertions(+), 8 deletions(-) diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh index de35bc18b07..6b501bc875c 100755 --- a/qa/standalone/osd/osd-rep-recov-eio.sh +++ b/qa/standalone/osd/osd-rep-recov-eio.sh @@ -110,18 +110,30 @@ function rados_get_data() { local poolname=pool-rep local objname=obj-$inject-$$ + local pgid=$(get_pg $poolname $objname) + rados_put $dir $poolname $objname || return 1 inject_$inject rep data $poolname $objname $dir 0 || return 1 rados_get $dir $poolname $objname || return 1 + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 + inject_$inject rep data $poolname $objname $dir 0 || return 1 inject_$inject rep data $poolname $objname $dir 1 || return 1 rados_get $dir $poolname $objname || return 1 + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "2" || return 1 + inject_$inject rep data $poolname $objname $dir 0 || return 1 inject_$inject rep data $poolname $objname $dir 1 || return 1 inject_$inject rep data $poolname $objname $dir 2 || return 1 rados_get $dir $poolname $objname hang || return 1 + + # After hang another repair couldn't happen, so count stays the same + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "2" || return 1 } function TEST_rados_get_with_eio() { diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index b326df8c38a..2af971f1576 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -364,6 +364,10 @@ function TEST_auto_repair_bluestore_scrub() { diff $dir/ORIGINAL $dir/COPY || return 1 grep scrub_finish $dir/osd.${primary}.log + # This should have caused 1 object to be repaired + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 + # Tear down teardown $dir || return 1 } @@ -491,6 +495,59 @@ function TEST_auto_repair_bluestore_failed_norecov() { teardown $dir || return 1 } +function TEST_repair_stats() { + local dir=$1 + local poolname=testpool + local OBJS=30 + local REPAIRS=20 + + # Launch a cluster with 5 seconds scrub interval + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd_bluestore $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 $OBJS) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + local other=$(get_not_primary $poolname obj1) + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + + kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + for i in $(seq 1 $REPAIRS) + do + _objectstore_tool_nodown $dir $other obj$i remove || return 1 + done + run_osd_bluestore $dir $other $ceph_osd_args || return 1 + + repair $pgid + wait_for_clean || return 1 + ceph pg dump pgs + + # This should have caused 1 object to be repaired + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$REPAIRS" || return 1 + + # Tear down + teardown $dir || return 1 +} + function corrupt_and_repair_jerasure() { local dir=$1 local allow_overwrites=$2 diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index c20e10a25ac..dfa1f4276b2 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -682,6 +682,8 @@ void ECBackend::continue_recovery_op( stat.num_bytes_recovered = op.recovery_info.size; stat.num_keys_recovered = 0; // ??? op ... omap_entries.size(); ? stat.num_objects_recovered = 1; + if (get_parent()->pg_is_repair()) + stat.num_objects_repaired = 1; get_parent()->on_global_recover(op.hoid, stat, false); dout(10) << __func__ << ": WRITING return " << op << dendl; recovery_ops.erase(op.hoid); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 35c53427a70..ff724a8dd18 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2335,6 +2335,8 @@ bool PG::queue_scrub() if (is_scrubbing()) { return false; } + // An interrupted recovery repair could leave this set. + state_clear(PG_STATE_REPAIR); scrubber.priority = scrubber.must_scrub ? cct->_conf->osd_requested_scrub_priority : get_scrub_priority(); scrubber.must_scrub = false; @@ -2544,6 +2546,8 @@ Context *PG::finish_recovery() void PG::_finish_recovery(Context *c) { lock(); + // When recovery is initiated by a repair, that flag is left on + state_clear(PG_STATE_REPAIR); if (deleting) { unlock(); return; @@ -5545,11 +5549,12 @@ bool PG::range_intersects_scrub(const hobject_t &start, const hobject_t& end) end >= scrubber.start); } -void PG::scrub_clear_state() +void PG::scrub_clear_state(bool has_error) { ceph_assert(is_locked()); state_clear(PG_STATE_SCRUBBING); - state_clear(PG_STATE_REPAIR); + if (!has_error) + state_clear(PG_STATE_REPAIR); state_clear(PG_STATE_DEEP_SCRUB); publish_stats_to_osd(); @@ -5867,7 +5872,7 @@ void PG::scrub_finish() DoRecovery()))); } - scrub_clear_state(); + scrub_clear_state(has_error); scrub_unreserve_replicas(); if (is_active() && is_primary()) { @@ -7694,6 +7699,7 @@ PG::RecoveryState::NotBackfilling::NotBackfilling(my_context ctx) { context< RecoveryMachine >().log_enter(state_name); PG *pg = context< RecoveryMachine >().pg; + pg->state_clear(PG_STATE_REPAIR); pg->publish_stats_to_osd(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index 7ea67cad2d7..d64c9ed71a9 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1879,7 +1879,7 @@ protected: bool scrub_process_inconsistent(); bool ops_blocked_by_scrub() const; void scrub_finish(); - void scrub_clear_state(); + void scrub_clear_state(bool keep_repair = false); void _scan_snaps(ScrubMap &map); void _repair_oinfo_oid(ScrubMap &map); void _scan_rollback_obs(const vector &rollback_obs); @@ -2957,6 +2957,7 @@ protected: } bool is_recovering() const { return state_test(PG_STATE_RECOVERING); } bool is_premerge() const { return state_test(PG_STATE_PREMERGE); } + bool is_repair() const { return state_test(PG_STATE_REPAIR); } bool is_empty() const { return info.last_update == eversion_t(0,0); } diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 6fdf6dd05b5..75f895b1b39 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -227,6 +227,7 @@ typedef std::shared_ptr OSDMapRef; const hobject_t &hoid) = 0; virtual bool pg_is_undersized() const = 0; + virtual bool pg_is_repair() const = 0; virtual void log_operation( const vector &logv, diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 51713b29414..8d83d81d054 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -11493,6 +11493,8 @@ int PrimaryLogPG::recover_missing( if (!object_missing) { object_stat_sum_t stat_diff; stat_diff.num_objects_recovered = 1; + if (scrub_after_recovery) + stat_diff.num_objects_repaired = 1; on_global_recover(soid, stat_diff, true); } else { auto recovery_handle = pgbackend->open_recovery_op(); @@ -15230,6 +15232,7 @@ int PrimaryLogPG::rep_repair_primary_object(const hobject_t& soid, OpContext *ct if (!eio_errors_to_process) { eio_errors_to_process = true; ceph_assert(is_clean()); + state_set(PG_STATE_REPAIR); queue_peering_event( PGPeeringEventRef( std::make_shared( diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 1fdc11d4b11..e5f47b6a6c8 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -454,6 +454,10 @@ public: return is_undersized(); } + bool pg_is_repair() const override { + return is_repair(); + } + void update_peer_last_complete_ondisk( pg_shard_t fromosd, eversion_t lcod) override { diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index d471e6b7a09..2602bbd24e3 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -1725,6 +1725,9 @@ bool ReplicatedBackend::handle_pull_response( if (complete) { pi.stat.num_objects_recovered++; + // XXX: This could overcount if regular recovery is needed right after a repair + if (get_parent()->pg_is_repair()) + pi.stat.num_objects_repaired++; clear_pull_from(piter); to_continue->push_back({hoid, pi.stat}); get_parent()->on_local_recover( @@ -1996,8 +1999,11 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info, if (new_progress.is_complete(recovery_info)) { new_progress.data_complete = true; - if (stat) + if (stat) { stat->num_objects_recovered++; + if (get_parent()->pg_is_repair()) + stat->num_objects_repaired++; + } } if (stat) { diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index ad839ad911f..3830167e248 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2229,11 +2229,12 @@ void object_stat_sum_t::dump(Formatter *f) const f->dump_int("num_objects_manifest", num_objects_manifest); f->dump_int("num_omap_bytes", num_omap_bytes); f->dump_int("num_omap_keys", num_omap_keys); + f->dump_int("num_objects_repaired", num_objects_repaired); } void object_stat_sum_t::encode(bufferlist& bl) const { - ENCODE_START(19, 14, bl); + ENCODE_START(20, 14, bl); #if defined(CEPH_LITTLE_ENDIAN) bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t)); #else @@ -2276,6 +2277,7 @@ void object_stat_sum_t::encode(bufferlist& bl) const encode(num_objects_manifest, bl); encode(num_omap_bytes, bl); encode(num_omap_keys, bl); + encode(num_objects_repaired, bl); #endif ENCODE_FINISH(bl); } @@ -2283,7 +2285,7 @@ void object_stat_sum_t::encode(bufferlist& bl) const void object_stat_sum_t::decode(bufferlist::const_iterator& bl) { bool decode_finish = false; - DECODE_START(19, bl); // make sure to also update fast decode below + DECODE_START(20, bl); // make sure to also update fast decode below #if defined(CEPH_LITTLE_ENDIAN) if (struct_v >= 19) { // this must match newest decode version bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes)); @@ -2340,6 +2342,9 @@ void object_stat_sum_t::decode(bufferlist::const_iterator& bl) decode(num_omap_bytes, bl); decode(num_omap_keys, bl); } + if (struct_v >= 20) { + decode(num_objects_repaired, bl); + } } DECODE_FINISH(bl); } @@ -2383,6 +2388,7 @@ void object_stat_sum_t::generate_test_instances(list& o) a.num_objects_manifest = 2; a.num_omap_bytes = 20000; a.num_omap_keys = 200; + a.num_objects_repaired = 300; o.push_back(new object_stat_sum_t(a)); } @@ -2427,6 +2433,7 @@ void object_stat_sum_t::add(const object_stat_sum_t& o) num_objects_manifest += o.num_objects_manifest; num_omap_bytes += o.num_omap_bytes; num_omap_keys += o.num_omap_keys; + num_objects_repaired += o.num_objects_repaired; } void object_stat_sum_t::sub(const object_stat_sum_t& o) @@ -2470,6 +2477,7 @@ void object_stat_sum_t::sub(const object_stat_sum_t& o) num_objects_manifest -= o.num_objects_manifest; num_omap_bytes -= o.num_omap_bytes; num_omap_keys -= o.num_omap_keys; + num_objects_repaired -= o.num_objects_repaired; } bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r) @@ -2513,7 +2521,8 @@ bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r) l.num_large_omap_objects == r.num_large_omap_objects && l.num_objects_manifest == r.num_objects_manifest && l.num_omap_bytes == r.num_omap_bytes && - l.num_omap_keys == r.num_omap_keys; + l.num_omap_keys == r.num_omap_keys && + l.num_objects_repaired == r.num_objects_repaired; } // -- object_stat_collection_t -- diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 62eb069db3b..fbcc71ba347 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1773,6 +1773,7 @@ struct object_stat_sum_t { int64_t num_objects_manifest = 0; int64_t num_omap_bytes = 0; int64_t num_omap_keys = 0; + int64_t num_objects_repaired = 0; object_stat_sum_t() : num_bytes(0), @@ -1845,6 +1846,7 @@ struct object_stat_sum_t { FLOOR(num_evict_mode_full); FLOOR(num_objects_pinned); FLOOR(num_legacy_snapsets); + FLOOR(num_objects_repaired); #undef FLOOR } @@ -1881,6 +1883,7 @@ struct object_stat_sum_t { SPLIT(num_objects_manifest); SPLIT(num_omap_bytes); SPLIT(num_omap_keys); + SPLIT(num_objects_repaired); SPLIT_PRESERVE_NONZERO(num_shallow_scrub_errors); SPLIT_PRESERVE_NONZERO(num_deep_scrub_errors); for (unsigned i = 0; i < out.size(); ++i) { @@ -1945,6 +1948,7 @@ struct object_stat_sum_t { sizeof(num_objects_manifest) + sizeof(num_omap_bytes) + sizeof(num_omap_keys) + + sizeof(num_objects_repaired) + sizeof(num_objects_recovered) + sizeof(num_bytes_recovered) + sizeof(num_keys_recovered) +