librbd: retry ENOENT in V2_REFRESH_PARENT as well

With auto-deletion of trashed snapshots, it is relatively easy to lose
a race to "rbd flatten" as follows:

- when V2_GET_PARENT runs, the image is technically still a clone
- when V2_REFRESH_PARENT runs, the image is fully flattened and the
  snapshot in the parent image is deleted

This results in a spurious ENOENT error, mainly when trying to open the
image (e.g. for "rbd info").  This race condition has always been there
but auto-deletion of trashed snapshots makes it much worse.

Retry ENOENT in V2_REFRESH_PARENT the same way as in V2_GET_SNAPSHOTS.

Fixes: https://tracker.ceph.com/issues/52810
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Ilya Dryomov 2022-09-04 17:52:51 +02:00
parent 8570194b13
commit bd885d75b2
3 changed files with 161 additions and 13 deletions

View File

@ -865,7 +865,14 @@ Context *RefreshRequest<I>::handle_v2_refresh_parent(int *result) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 10) << this << " " << __func__ << ": r=" << *result << dendl;
if (*result < 0) {
if (*result == -ENOENT && m_enoent_retries++ < MAX_ENOENT_RETRIES) {
ldout(cct, 10) << "out-of-sync parent info detected, retrying" << dendl;
ceph_assert(m_refresh_parent != nullptr);
delete m_refresh_parent;
m_refresh_parent = nullptr;
send_v2_get_mutable_metadata();
return nullptr;
} else if (*result < 0) {
lderr(cct) << "failed to refresh parent image: " << cpp_strerror(*result)
<< dendl;
save_result(result);

View File

@ -80,9 +80,9 @@ private:
* * v v * |
* * * V2_GET_SNAPSHOTS (skip if no snaps) |
* (ENOENT) | |
* v |
* V2_REFRESH_PARENT (skip if no parent or |
* | refresh not needed) |
* * v |
* * * V2_REFRESH_PARENT (skip if no parent or |
* (ENOENT) | refresh not needed) |
* v |
* V2_INIT_EXCLUSIVE_LOCK (skip if lock |
* | active or disabled) |

View File

@ -20,7 +20,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <arpa/inet.h>
#include <list>
#include <queue>
#include <boost/scope_exit.hpp>
namespace librbd {
@ -69,26 +69,32 @@ struct GetMetadataRequest<MockRefreshImageCtx> {
template <>
struct RefreshParentRequest<MockRefreshImageCtx> {
static RefreshParentRequest* s_instance;
static std::queue<RefreshParentRequest*> s_instances;
static RefreshParentRequest* create(MockRefreshImageCtx &mock_image_ctx,
const ParentImageInfo &parent_md,
const MigrationInfo &migration_info,
Context *on_finish) {
ceph_assert(s_instance != nullptr);
s_instance->on_finish = on_finish;
return s_instance;
ceph_assert(!s_instances.empty());
auto instance = s_instances.front();
instance->on_finish = on_finish;
return instance;
}
static bool is_refresh_required(MockRefreshImageCtx &mock_image_ctx,
const ParentImageInfo& parent_md,
const MigrationInfo &migration_info) {
ceph_assert(s_instance != nullptr);
return s_instance->is_refresh_required();
ceph_assert(!s_instances.empty());
return s_instances.front()->is_refresh_required();
}
Context *on_finish = nullptr;
RefreshParentRequest() {
s_instance = this;
s_instances.push(this);
}
~RefreshParentRequest() {
ceph_assert(this == s_instances.front());
s_instances.pop();
}
MOCK_CONST_METHOD0(is_refresh_required, bool());
@ -98,7 +104,7 @@ struct RefreshParentRequest<MockRefreshImageCtx> {
};
GetMetadataRequest<MockRefreshImageCtx>* GetMetadataRequest<MockRefreshImageCtx>::s_instance = nullptr;
RefreshParentRequest<MockRefreshImageCtx>* RefreshParentRequest<MockRefreshImageCtx>::s_instance = nullptr;
std::queue<RefreshParentRequest<MockRefreshImageCtx>*> RefreshParentRequest<MockRefreshImageCtx>::s_instances;
} // namespace image
@ -909,6 +915,141 @@ TEST_F(TestMockImageRefreshRequest, SuccessChildDontOpenParent) {
ASSERT_EQ(0, ctx.wait());
}
TEST_F(TestMockImageRefreshRequest, SuccessChildBeingFlattened) {
REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
librbd::ImageCtx *ictx;
librbd::ImageCtx *ictx2 = nullptr;
std::string clone_name = get_temp_image_name();
ASSERT_EQ(0, open_image(m_image_name, &ictx));
ASSERT_EQ(0, snap_create(*ictx, "snap"));
ASSERT_EQ(0, snap_protect(*ictx, "snap"));
BOOST_SCOPE_EXIT_ALL((&)) {
if (ictx2 != nullptr) {
close_image(ictx2);
}
librbd::NoOpProgressContext no_op;
ASSERT_EQ(0, librbd::api::Image<>::remove(m_ioctx, clone_name, no_op));
ASSERT_EQ(0, ictx->operations->snap_unprotect(
cls::rbd::UserSnapshotNamespace(), "snap"));
};
int order = ictx->order;
ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap", m_ioctx,
clone_name.c_str(), ictx->features, &order, 0, 0));
ASSERT_EQ(0, open_image(clone_name, &ictx2));
MockRefreshImageCtx mock_image_ctx(*ictx2);
auto mock_refresh_parent_request = new MockRefreshParentRequest();
MockRefreshParentRequest mock_refresh_parent_request_ext;
MockExclusiveLock mock_exclusive_lock;
expect_op_work_queue(mock_image_ctx);
expect_test_features(mock_image_ctx);
mock_image_ctx.features &= ~RBD_FEATURE_OPERATIONS;
InSequence seq;
expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0);
expect_get_parent(mock_image_ctx, 0);
MockGetMetadataRequest mock_get_metadata_request;
expect_get_metadata(mock_image_ctx, mock_get_metadata_request,
mock_image_ctx.header_oid, {}, 0);
expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {},
0);
expect_apply_metadata(mock_image_ctx, 0);
expect_get_group(mock_image_ctx, 0);
expect_refresh_parent_is_required(*mock_refresh_parent_request, true);
expect_refresh_parent_send(mock_image_ctx, *mock_refresh_parent_request,
-ENOENT);
expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0);
expect_get_parent(mock_image_ctx, 0);
expect_get_metadata(mock_image_ctx, mock_get_metadata_request,
mock_image_ctx.header_oid, {}, 0);
expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {},
0);
expect_apply_metadata(mock_image_ctx, 0);
expect_get_group(mock_image_ctx, 0);
expect_refresh_parent_is_required(mock_refresh_parent_request_ext, false);
if (ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
expect_init_exclusive_lock(mock_image_ctx, mock_exclusive_lock, 0);
}
EXPECT_CALL(mock_image_ctx, rebuild_data_io_context());
C_SaferCond ctx;
auto req = new MockRefreshRequest(mock_image_ctx, false, false, &ctx);
req->send();
ASSERT_EQ(0, ctx.wait());
}
TEST_F(TestMockImageRefreshRequest, ChildEnoentRetriesLimit) {
REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
librbd::ImageCtx *ictx;
librbd::ImageCtx *ictx2 = nullptr;
std::string clone_name = get_temp_image_name();
ASSERT_EQ(0, open_image(m_image_name, &ictx));
ASSERT_EQ(0, snap_create(*ictx, "snap"));
ASSERT_EQ(0, snap_protect(*ictx, "snap"));
BOOST_SCOPE_EXIT_ALL((&)) {
if (ictx2 != nullptr) {
close_image(ictx2);
}
librbd::NoOpProgressContext no_op;
ASSERT_EQ(0, librbd::api::Image<>::remove(m_ioctx, clone_name, no_op));
ASSERT_EQ(0, ictx->operations->snap_unprotect(
cls::rbd::UserSnapshotNamespace(), "snap"));
};
int order = ictx->order;
ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap", m_ioctx,
clone_name.c_str(), ictx->features, &order, 0, 0));
ASSERT_EQ(0, open_image(clone_name, &ictx2));
MockRefreshImageCtx mock_image_ctx(*ictx2);
constexpr int num_tries = RefreshRequest<>::MAX_ENOENT_RETRIES + 1;
MockRefreshParentRequest* mock_refresh_parent_requests[num_tries];
for (auto& mock_refresh_parent_request : mock_refresh_parent_requests) {
mock_refresh_parent_request = new MockRefreshParentRequest();
}
MockGetMetadataRequest mock_get_metadata_request;
expect_op_work_queue(mock_image_ctx);
expect_test_features(mock_image_ctx);
mock_image_ctx.features &= ~RBD_FEATURE_OPERATIONS;
InSequence seq;
for (auto mock_refresh_parent_request : mock_refresh_parent_requests) {
expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0);
expect_get_parent(mock_image_ctx, 0);
expect_get_metadata(mock_image_ctx, mock_get_metadata_request,
mock_image_ctx.header_oid, {}, 0);
expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {},
0);
expect_apply_metadata(mock_image_ctx, 0);
expect_get_group(mock_image_ctx, 0);
expect_refresh_parent_is_required(*mock_refresh_parent_request, true);
expect_refresh_parent_send(mock_image_ctx, *mock_refresh_parent_request,
-ENOENT);
}
expect_refresh_parent_apply(*mock_refresh_parent_requests[num_tries - 1]);
EXPECT_CALL(mock_image_ctx, rebuild_data_io_context());
expect_refresh_parent_finalize(
mock_image_ctx, *mock_refresh_parent_requests[num_tries - 1], 0);
C_SaferCond ctx;
auto req = new MockRefreshRequest(mock_image_ctx, false, false, &ctx);
req->send();
ASSERT_EQ(-ENOENT, ctx.wait());
}
TEST_F(TestMockImageRefreshRequest, SuccessOpFeatures) {
REQUIRE_FORMAT_V2();