From 85e7075d5f021bd2d11024e6646d74a8a9f96e15 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 20 Mar 2022 12:10:52 +0100 Subject: [PATCH] librbd: make diff-iterate in fast-diff mode sort and merge reported extents Various users, the most notable example being the QEMU driver, assume that extents are reported in image offset order. Fixes: https://tracker.ceph.com/issues/53885 Signed-off-by: Ilya Dryomov --- src/librbd/api/DiffIterate.cc | 40 ++++++++++++------- src/test/librbd/test_librbd.cc | 71 ++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 15 deletions(-) diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc index 856a30a6b77..fa1df6df373 100644 --- a/src/librbd/api/DiffIterate.cc +++ b/src/librbd/api/DiffIterate.cc @@ -300,12 +300,15 @@ int DiffIterate::execute() { &m_image_ctx.layout, off, read_len, 0, object_extents, 0); - // get snap info for each object + // get diff info for each object and merge adjacent stripe units + // into an aggregate (this also sorts them) + io::SparseExtents aggregate_sparse_extents; for (auto& [object, extents] : object_extents) { - ldout(cct, 20) << "object " << object << dendl; - const uint64_t object_no = extents.front().objectno; uint8_t diff_state = object_diff_state[object_no]; + ldout(cct, 20) << "object " << object << ": diff_state=" + << (int)diff_state << dendl; + if (diff_state == object_map::DIFF_STATE_HOLE && from_snap_id == 0 && !parent_diff.empty()) { // no data in child object -- report parent diff instead @@ -316,29 +319,36 @@ int DiffIterate::execute() { o.intersection_of(parent_diff); ldout(cct, 20) << " reporting parent overlap " << o << dendl; for (auto e = o.begin(); e != o.end(); ++e) { - r = m_callback(e.get_start(), e.get_len(), true, - m_callback_arg); - if (r < 0) { - return r; - } + aggregate_sparse_extents.insert(e.get_start(), e.get_len(), + {io::SPARSE_EXTENT_STATE_DATA, + e.get_len()}); } } } } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED || diff_state == object_map::DIFF_STATE_DATA_UPDATED) { - bool updated = (diff_state == object_map::DIFF_STATE_DATA_UPDATED); + auto state = (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ? + io::SPARSE_EXTENT_STATE_ZEROED : io::SPARSE_EXTENT_STATE_DATA); for (auto& oe : extents) { for (auto& be : oe.buffer_extents) { - r = m_callback(off + be.first, be.second, updated, - m_callback_arg); - if (r < 0) { - return r; - } + aggregate_sparse_extents.insert(off + be.first, be.second, + {state, be.second}); } } } } - } else { + + for (const auto& se : aggregate_sparse_extents) { + ldout(cct, 20) << "off=" << se.get_off() << ", len=" << se.get_len() + << ", state=" << se.get_val().state << dendl; + r = m_callback(se.get_off(), se.get_len(), + se.get_val().state == io::SPARSE_EXTENT_STATE_DATA, + m_callback_arg); + if (r < 0) { + return r; + } + } + } else { auto diff_object = new C_DiffObject(m_image_ctx, diff_context, off, read_len); diff_object->send(); diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 245e1926762..b71e5b76b02 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -4787,6 +4787,77 @@ TYPED_TEST(DiffIterateTest, DiffIterateUnaligned) ioctx.close(); } +TYPED_TEST(DiffIterateTest, DiffIterateStriping) +{ + REQUIRE_FEATURE(RBD_FEATURE_STRIPINGV2); + + librados::IoCtx ioctx; + ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); + + bool old_format; + uint64_t features; + ASSERT_EQ(0, get_features(&old_format, &features)); + ASSERT_FALSE(old_format); + + { + librbd::RBD rbd; + librbd::Image image; + int order = 22; + std::string name = this->get_temp_image_name(); + ssize_t size = 24 << 20; + + ASSERT_EQ(0, rbd.create3(ioctx, name.c_str(), size, features, &order, + 1 << 20, 3)); + ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); + + ceph::bufferlist bl; + bl.append(std::string(size, '1')); + ASSERT_EQ(size, image.write(0, size, bl)); + + std::vector extents; + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 12 << 20, true, 0), extents[0]); + ASSERT_EQ(diff_extent(12 << 20, 12 << 20, true, 0), extents[1]); + extents.clear(); + + ASSERT_EQ(0, image.snap_create("one")); + ASSERT_EQ(size, image.discard(0, size)); + + ASSERT_EQ(0, image.diff_iterate2("one", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 12 << 20, false, 0), extents[0]); + ASSERT_EQ(diff_extent(12 << 20, 12 << 20, false, 0), extents[1]); + extents.clear(); + + ASSERT_EQ(1 << 20, image.write(0, 1 << 20, bl)); + ASSERT_EQ(2 << 20, image.write(2 << 20, 2 << 20, bl)); + ASSERT_EQ(2 << 20, image.write(5 << 20, 2 << 20, bl)); + ASSERT_EQ(2 << 20, image.write(8 << 20, 2 << 20, bl)); + ASSERT_EQ(13 << 20, image.write(11 << 20, 13 << 20, bl)); + + ASSERT_EQ(0, image.diff_iterate2("one", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(10u, extents.size()); + ASSERT_EQ(diff_extent(0, 1 << 20, true, 0), extents[0]); + ASSERT_EQ(diff_extent(1 << 20, 1 << 20, false, 0), extents[1]); + ASSERT_EQ(diff_extent(2 << 20, 2 << 20, true, 0), extents[2]); + ASSERT_EQ(diff_extent(4 << 20, 1 << 20, false, 0), extents[3]); + ASSERT_EQ(diff_extent(5 << 20, 2 << 20, true, 0), extents[4]); + ASSERT_EQ(diff_extent(7 << 20, 1 << 20, false, 0), extents[5]); + ASSERT_EQ(diff_extent(8 << 20, 2 << 20, true, 0), extents[6]); + ASSERT_EQ(diff_extent(10 << 20, 1 << 20, false, 0), extents[7]); + ASSERT_EQ(diff_extent(11 << 20, 1 << 20, true, 0), extents[8]); + ASSERT_EQ(diff_extent(12 << 20, 12 << 20, true, 0), extents[9]); + + ASSERT_PASSED(this->validate_object_map, image); + } + + ioctx.close(); +} + TEST_F(TestLibRBD, ZeroLengthWrite) { rados_ioctx_t ioctx;