rbd-mirror: track in-flight start/stop/restart in instance replayer

The shut down waits for in-flight ops to complete but the
start/stop/restart operations were previously not tracked. This
could cause a potential race and crash between an image replayer
operation and the instance replayer shutting down.

Fixes: https://tracker.ceph.com/issues/45072
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
This commit is contained in:
Jason Dillaman 2020-04-17 11:17:05 -04:00
parent 64f8d9c30c
commit 31140a940e
2 changed files with 22 additions and 21 deletions

View File

@ -90,7 +90,7 @@ struct ImageReplayer<librbd::MockTestImageCtx> {
MOCK_METHOD0(destroy, void());
MOCK_METHOD2(start, void(Context *, bool));
MOCK_METHOD2(stop, void(Context *, bool));
MOCK_METHOD0(restart, void());
MOCK_METHOD1(restart, void(Context*));
MOCK_METHOD0(flush, void());
MOCK_METHOD1(print_status, void(Formatter *));
MOCK_METHOD1(add_peer, void(const Peer<librbd::MockTestImageCtx>& peer));
@ -201,7 +201,8 @@ TEST_F(TestMockInstanceReplayer, AcquireReleaseImage) {
EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false));
EXPECT_CALL(mock_image_replayer, start(nullptr, false));
EXPECT_CALL(mock_image_replayer, start(_, false))
.WillOnce(CompleteContext(0));
expect_work_queue(mock_threads);
instance_replayer.acquire_image(&mock_instance_watcher, global_image_id,
@ -271,7 +272,8 @@ TEST_F(TestMockInstanceReplayer, RemoveFinishedImage) {
EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false));
EXPECT_CALL(mock_image_replayer, start(nullptr, false));
EXPECT_CALL(mock_image_replayer, start(_, false))
.WillOnce(CompleteContext(0));
expect_work_queue(mock_threads);
instance_replayer.acquire_image(&mock_instance_watcher, global_image_id,
@ -344,7 +346,8 @@ TEST_F(TestMockInstanceReplayer, Reacquire) {
EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false));
EXPECT_CALL(mock_image_replayer, start(nullptr, false));
EXPECT_CALL(mock_image_replayer, start(_, false))
.WillOnce(CompleteContext(0));
expect_work_queue(mock_threads);
C_SaferCond on_acquire1;
@ -354,7 +357,8 @@ TEST_F(TestMockInstanceReplayer, Reacquire) {
// Re-acquire
EXPECT_CALL(mock_image_replayer, set_finished(false));
EXPECT_CALL(mock_image_replayer, restart());
EXPECT_CALL(mock_image_replayer, restart(_))
.WillOnce(CompleteContext(0));
expect_work_queue(mock_threads);
C_SaferCond on_acquire2;

View File

@ -175,7 +175,7 @@ void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher,
// detect if the image has been deleted while the leader was offline
auto& image_replayer = it->second;
image_replayer->set_finished(false);
image_replayer->restart();
image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
}
m_threads->work_queue->queue(on_finish, 0);
@ -224,7 +224,7 @@ void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id,
// it will eventually detect that the peer image is missing and
// determine if a delete propagation is required.
auto image_replayer = it->second;
image_replayer->restart();
image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
}
m_threads->work_queue->queue(on_finish, 0);
}
@ -252,25 +252,21 @@ void InstanceReplayer<I>::start()
m_manual_stop = false;
auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
auto gather_ctx = new C_Gather(
cct, new C_TrackedOp(m_async_op_tracker, nullptr));
for (auto &kv : m_image_replayers) {
auto &image_replayer = kv.second;
image_replayer->start(nullptr, true);
image_replayer->start(gather_ctx->new_sub(), true);
}
gather_ctx->activate();
}
template <typename I>
void InstanceReplayer<I>::stop()
{
dout(10) << dendl;
std::lock_guard locker{m_lock};
m_manual_stop = true;
for (auto &kv : m_image_replayers) {
auto &image_replayer = kv.second;
image_replayer->stop(nullptr, true);
}
stop(nullptr);
}
template <typename I>
@ -279,7 +275,8 @@ void InstanceReplayer<I>::stop(Context *on_finish)
dout(10) << dendl;
auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
auto gather_ctx = new C_Gather(cct, on_finish);
auto gather_ctx = new C_Gather(
cct, new C_TrackedOp(m_async_op_tracker, on_finish));
{
std::lock_guard locker{m_lock};
@ -305,7 +302,7 @@ void InstanceReplayer<I>::restart()
for (auto &kv : m_image_replayers) {
auto &image_replayer = kv.second;
image_replayer->restart();
image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
}
}
@ -347,7 +344,7 @@ void InstanceReplayer<I>::start_image_replayer(
}
dout(10) << "global_image_id=" << global_image_id << dendl;
image_replayer->start(nullptr, false);
image_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), false);
}
template <typename I>