From 31140a940ea1909c4b5d68ef4593cb582a527354 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Fri, 17 Apr 2020 11:17:05 -0400 Subject: [PATCH] rbd-mirror: track in-flight start/stop/restart in instance replayer The shut down waits for in-flight ops to complete but the start/stop/restart operations were previously not tracked. This could cause a potential race and crash between an image replayer operation and the instance replayer shutting down. Fixes: https://tracker.ceph.com/issues/45072 Signed-off-by: Jason Dillaman --- .../rbd_mirror/test_mock_InstanceReplayer.cc | 14 +++++---- src/tools/rbd_mirror/InstanceReplayer.cc | 29 +++++++++---------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/test/rbd_mirror/test_mock_InstanceReplayer.cc b/src/test/rbd_mirror/test_mock_InstanceReplayer.cc index 574e3d5507a..d2462c1f29c 100644 --- a/src/test/rbd_mirror/test_mock_InstanceReplayer.cc +++ b/src/test/rbd_mirror/test_mock_InstanceReplayer.cc @@ -90,7 +90,7 @@ struct ImageReplayer { MOCK_METHOD0(destroy, void()); MOCK_METHOD2(start, void(Context *, bool)); MOCK_METHOD2(stop, void(Context *, bool)); - MOCK_METHOD0(restart, void()); + MOCK_METHOD1(restart, void(Context*)); MOCK_METHOD0(flush, void()); MOCK_METHOD1(print_status, void(Formatter *)); MOCK_METHOD1(add_peer, void(const Peer& peer)); @@ -201,7 +201,8 @@ TEST_F(TestMockInstanceReplayer, AcquireReleaseImage) { EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true)); EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false)); EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false)); - EXPECT_CALL(mock_image_replayer, start(nullptr, false)); + EXPECT_CALL(mock_image_replayer, start(_, false)) + .WillOnce(CompleteContext(0)); expect_work_queue(mock_threads); instance_replayer.acquire_image(&mock_instance_watcher, global_image_id, @@ -271,7 +272,8 @@ TEST_F(TestMockInstanceReplayer, RemoveFinishedImage) { EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true)); EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false)); EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false)); - EXPECT_CALL(mock_image_replayer, start(nullptr, false)); + EXPECT_CALL(mock_image_replayer, start(_, false)) + .WillOnce(CompleteContext(0)); expect_work_queue(mock_threads); instance_replayer.acquire_image(&mock_instance_watcher, global_image_id, @@ -344,7 +346,8 @@ TEST_F(TestMockInstanceReplayer, Reacquire) { EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true)); EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false)); EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false)); - EXPECT_CALL(mock_image_replayer, start(nullptr, false)); + EXPECT_CALL(mock_image_replayer, start(_, false)) + .WillOnce(CompleteContext(0)); expect_work_queue(mock_threads); C_SaferCond on_acquire1; @@ -354,7 +357,8 @@ TEST_F(TestMockInstanceReplayer, Reacquire) { // Re-acquire EXPECT_CALL(mock_image_replayer, set_finished(false)); - EXPECT_CALL(mock_image_replayer, restart()); + EXPECT_CALL(mock_image_replayer, restart(_)) + .WillOnce(CompleteContext(0)); expect_work_queue(mock_threads); C_SaferCond on_acquire2; diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc index 9a8b50596c6..95bb67129dd 100644 --- a/src/tools/rbd_mirror/InstanceReplayer.cc +++ b/src/tools/rbd_mirror/InstanceReplayer.cc @@ -175,7 +175,7 @@ void InstanceReplayer::acquire_image(InstanceWatcher *instance_watcher, // detect if the image has been deleted while the leader was offline auto& image_replayer = it->second; image_replayer->set_finished(false); - image_replayer->restart(); + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); } m_threads->work_queue->queue(on_finish, 0); @@ -224,7 +224,7 @@ void InstanceReplayer::remove_peer_image(const std::string &global_image_id, // it will eventually detect that the peer image is missing and // determine if a delete propagation is required. auto image_replayer = it->second; - image_replayer->restart(); + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); } m_threads->work_queue->queue(on_finish, 0); } @@ -252,25 +252,21 @@ void InstanceReplayer::start() m_manual_stop = false; + auto cct = static_cast(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather( + cct, new C_TrackedOp(m_async_op_tracker, nullptr)); for (auto &kv : m_image_replayers) { auto &image_replayer = kv.second; - image_replayer->start(nullptr, true); + image_replayer->start(gather_ctx->new_sub(), true); } + + gather_ctx->activate(); } template void InstanceReplayer::stop() { - dout(10) << dendl; - - std::lock_guard locker{m_lock}; - - m_manual_stop = true; - - for (auto &kv : m_image_replayers) { - auto &image_replayer = kv.second; - image_replayer->stop(nullptr, true); - } + stop(nullptr); } template @@ -279,7 +275,8 @@ void InstanceReplayer::stop(Context *on_finish) dout(10) << dendl; auto cct = static_cast(m_local_io_ctx.cct()); - auto gather_ctx = new C_Gather(cct, on_finish); + auto gather_ctx = new C_Gather( + cct, new C_TrackedOp(m_async_op_tracker, on_finish)); { std::lock_guard locker{m_lock}; @@ -305,7 +302,7 @@ void InstanceReplayer::restart() for (auto &kv : m_image_replayers) { auto &image_replayer = kv.second; - image_replayer->restart(); + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); } } @@ -347,7 +344,7 @@ void InstanceReplayer::start_image_replayer( } dout(10) << "global_image_id=" << global_image_id << dendl; - image_replayer->start(nullptr, false); + image_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), false); } template