From 10edb2ffbdb7c4ef839f1ba7b88d7d85a682b7be Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Tue, 22 Aug 2023 14:10:46 +0800 Subject: [PATCH] crimson/osd/pg_recovery: avoiding duplicated object recovering UrgentRecovery and other recoveries may collide when doing `PGRecovery::add_recovering`, this is not an error. We should allow this to happen Signed-off-by: Xuehan Xu --- src/crimson/osd/pg_recovery.cc | 91 +++++++++++++++++++----------- src/crimson/osd/recovery_backend.h | 6 +- 2 files changed, 60 insertions(+), 37 deletions(-) diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc index 09b45779ec8..efbbf7e4f3e 100644 --- a/src/crimson/osd/pg_recovery.cc +++ b/src/crimson/osd/pg_recovery.cc @@ -266,20 +266,27 @@ PGRecovery::recover_missing( RecoveryBackend::RecoveryBlockingEvent::TriggerI& trigger, const hobject_t &soid, eversion_t need) { - if (pg->get_peering_state().get_missing_loc().is_deleted(soid)) { - return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking( - trigger, - pg->get_recovery_backend()->recover_delete(soid, need)); + logger().info("{} {} v {}", __func__, soid, need); + auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid); + if (added) { + logger().info("{} {} v {}, new recovery", __func__, soid, need); + if (pg->get_peering_state().get_missing_loc().is_deleted(soid)) { + return recovering.wait_track_blocking( + trigger, + pg->get_recovery_backend()->recover_delete(soid, need)); + } else { + return recovering.wait_track_blocking( + trigger, + pg->get_recovery_backend()->recover_object(soid, need) + .handle_exception_interruptible( + [=, this, soid = std::move(soid)] (auto e) { + on_failed_recover({ pg->get_pg_whoami() }, soid, need); + return seastar::make_ready_future<>(); + }) + ); + } } else { - return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking( - trigger, - pg->get_recovery_backend()->recover_object(soid, need) - .handle_exception_interruptible( - [=, this, soid = std::move(soid)] (auto e) { - on_failed_recover({ pg->get_pg_whoami() }, soid, need); - return seastar::make_ready_future<>(); - }) - ); + return recovering.wait_for_recovered(); } } @@ -288,16 +295,23 @@ RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_deletes( const hobject_t& soid, eversion_t need) { - return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking( - trigger, - pg->get_recovery_backend()->push_delete(soid, need).then_interruptible( - [=, this] { - object_stat_sum_t stat_diff; - stat_diff.num_objects_recovered = 1; - on_global_recover(soid, stat_diff, true); - return seastar::make_ready_future<>(); - }) - ); + logger().info("{} {} v {}", __func__, soid, need); + auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid); + if (added) { + logger().info("{} {} v {}, new recovery", __func__, soid, need); + return recovering.wait_track_blocking( + trigger, + pg->get_recovery_backend()->push_delete(soid, need).then_interruptible( + [=, this] { + object_stat_sum_t stat_diff; + stat_diff.num_objects_recovered = 1; + on_global_recover(soid, stat_diff, true); + return seastar::make_ready_future<>(); + }) + ); + } else { + return recovering.wait_for_recovered(); + } } RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_pushes( @@ -305,15 +319,22 @@ RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_pushes( const hobject_t& soid, eversion_t need) { - return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking( - trigger, - pg->get_recovery_backend()->recover_object(soid, need) - .handle_exception_interruptible( - [=, this, soid = std::move(soid)] (auto e) { - on_failed_recover({ pg->get_pg_whoami() }, soid, need); - return seastar::make_ready_future<>(); - }) - ); + logger().info("{} {} v {}", __func__, soid, need); + auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid); + if (added) { + logger().info("{} {} v {}, new recovery", __func__, soid, need); + return recovering.wait_track_blocking( + trigger, + pg->get_recovery_backend()->recover_object(soid, need) + .handle_exception_interruptible( + [=, this, soid = std::move(soid)] (auto e) { + on_failed_recover({ pg->get_pg_whoami() }, soid, need); + return seastar::make_ready_future<>(); + }) + ); + } else { + return recovering.wait_for_recovered(); + } } void PGRecovery::on_local_recover( @@ -449,9 +470,11 @@ void PGRecovery::enqueue_push( const hobject_t& obj, const eversion_t& v) { - logger().debug("{}: obj={} v={}", + logger().info("{}: obj={} v={}", __func__, obj, v); - pg->get_recovery_backend()->add_recovering(obj); + auto [recovering, added] = pg->get_recovery_backend()->add_recovering(obj); + if (!added) + return; std::ignore = pg->get_recovery_backend()->recover_object(obj, v).\ handle_exception_interruptible([] (auto) { ceph_abort_msg("got exception on backfill's push"); diff --git a/src/crimson/osd/recovery_backend.h b/src/crimson/osd/recovery_backend.h index 65e9bb01fbd..abf69589159 100644 --- a/src/crimson/osd/recovery_backend.h +++ b/src/crimson/osd/recovery_backend.h @@ -45,10 +45,10 @@ public: coll{coll}, backend{backend} {} virtual ~RecoveryBackend() {} - WaitForObjectRecovery& add_recovering(const hobject_t& soid) { + std::pair add_recovering(const hobject_t& soid) { auto [it, added] = recovering.emplace(soid, new WaitForObjectRecovery{}); - assert(added); - return *(it->second); + assert(it->second); + return {*(it->second), added}; } WaitForObjectRecovery& get_recovering(const hobject_t& soid) { assert(is_recovering(soid));