mirror of
https://github.com/ceph/ceph
synced 2025-02-24 03:27:10 +00:00
Merge pull request #27268 from trociny/wip-38364
librbd: support EC data pool images sparsify Reviewed-by: Jason Dillaman <dillaman@redhat.com>
This commit is contained in:
commit
f42d3ffb31
@ -5,6 +5,7 @@
|
||||
#include "cls/rbd/cls_rbd_client.h"
|
||||
#include "common/dout.h"
|
||||
#include "common/errno.h"
|
||||
#include "include/err.h"
|
||||
#include "librbd/AsyncObjectThrottle.h"
|
||||
#include "librbd/ExclusiveLock.h"
|
||||
#include "librbd/ImageCtx.h"
|
||||
@ -19,6 +20,58 @@
|
||||
namespace librbd {
|
||||
namespace operation {
|
||||
|
||||
namespace {
|
||||
|
||||
bool may_be_trimmed(const std::map<uint64_t,uint64_t> &extent_map,
|
||||
const bufferlist &bl, size_t sparse_size,
|
||||
uint64_t *new_end_ptr) {
|
||||
if (extent_map.empty()) {
|
||||
*new_end_ptr = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t end = extent_map.rbegin()->first + extent_map.rbegin()->second;
|
||||
uint64_t new_end = end;
|
||||
uint64_t bl_off = bl.length();
|
||||
|
||||
for (auto it = extent_map.rbegin(); it != extent_map.rend(); it++) {
|
||||
auto off = it->first;
|
||||
auto len = it->second;
|
||||
|
||||
new_end = p2roundup<uint64_t>(off + len, sparse_size);
|
||||
|
||||
uint64_t extent_left = len;
|
||||
uint64_t sub_len = len % sparse_size;
|
||||
if (sub_len == 0) {
|
||||
sub_len = sparse_size;
|
||||
}
|
||||
while (extent_left > 0) {
|
||||
ceph_assert(bl_off >= sub_len);
|
||||
bl_off -= sub_len;
|
||||
bufferlist sub_bl;
|
||||
sub_bl.substr_of(bl, bl_off, sub_len);
|
||||
if (!sub_bl.is_zero()) {
|
||||
break;
|
||||
}
|
||||
new_end -= sparse_size;
|
||||
extent_left -= sub_len;
|
||||
sub_len = sparse_size;
|
||||
}
|
||||
if (extent_left > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_end < end) {
|
||||
*new_end_ptr = new_end;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
using util::create_context_callback;
|
||||
using util::create_rados_callback;
|
||||
|
||||
@ -35,21 +88,24 @@ public:
|
||||
*
|
||||
* <start>
|
||||
* |
|
||||
* v (object map disabled)
|
||||
* SPARSIFY -----------------------\
|
||||
* | |
|
||||
* | (object map enabled) |
|
||||
* v |
|
||||
* PRE UPDATE OBJECT MAP |
|
||||
* | |
|
||||
* v |
|
||||
* CHECK EXISTS |
|
||||
* | |
|
||||
* v |
|
||||
* POST UPDATE OBJECT MAP |
|
||||
* | |
|
||||
* v |
|
||||
* <finish> <----------------------/
|
||||
* v (not supported)
|
||||
* SPARSIFY * * * * * * * * * * * * > READ < * * * * * * * * * * (concurrent
|
||||
* | | * update is
|
||||
* | (object map disabled) | (can trim) * detected)
|
||||
* |------------------------\ V *
|
||||
* | | PRE UPDATE OBJECT MAP *
|
||||
* | (object map enabled) | | (if needed) *
|
||||
* v | V *
|
||||
* PRE UPDATE OBJECT MAP | TRIM * * * * * * * * * * *
|
||||
* | | |
|
||||
* v | V
|
||||
* CHECK EXISTS | POST UPDATE OBJECT MAP
|
||||
* | | | (if needed)
|
||||
* v | |
|
||||
* POST UPDATE OBJECT MAP | |
|
||||
* | | |
|
||||
* v | |
|
||||
* <finish> <------------------/<-------/
|
||||
*
|
||||
* @endverbatim
|
||||
*
|
||||
@ -112,12 +168,20 @@ public:
|
||||
void handle_sparsify(int r) {
|
||||
ldout(m_cct, 20) << "r=" << r << dendl;
|
||||
|
||||
if (r < 0 && r != -ENOENT) {
|
||||
lderr(m_cct) << "failed to sparsify: " << cpp_strerror(r) << dendl;
|
||||
if (r == -EOPNOTSUPP) {
|
||||
m_trying_trim = true;
|
||||
send_read();
|
||||
return;
|
||||
}
|
||||
|
||||
if (r == -ENOENT) {
|
||||
this->complete(0);
|
||||
finish_op(0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (r < 0) {
|
||||
lderr(m_cct) << "failed to sparsify: " << cpp_strerror(r) << dendl;
|
||||
finish_op(r);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -125,34 +189,41 @@ public:
|
||||
}
|
||||
|
||||
void send_pre_update_object_map() {
|
||||
I *image_ctx = &this->m_image_ctx;
|
||||
I &image_ctx = this->m_image_ctx;
|
||||
|
||||
if (!m_remove_empty || !image_ctx->test_features(RBD_FEATURE_OBJECT_MAP)) {
|
||||
this->complete(0);
|
||||
if (m_trying_trim) {
|
||||
if (!m_remove_empty || m_new_end != 0 ||
|
||||
!image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
|
||||
send_trim();
|
||||
return;
|
||||
}
|
||||
} else if (!m_remove_empty ||
|
||||
!image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
|
||||
finish_op(0);
|
||||
return;
|
||||
}
|
||||
|
||||
ldout(m_cct, 20) << dendl;
|
||||
|
||||
image_ctx->owner_lock.get_read();
|
||||
image_ctx->snap_lock.get_read();
|
||||
if (image_ctx->object_map == nullptr) {
|
||||
image_ctx.owner_lock.get_read();
|
||||
image_ctx.snap_lock.get_read();
|
||||
if (image_ctx.object_map == nullptr) {
|
||||
// possible that exclusive lock was lost in background
|
||||
lderr(m_cct) << "object map is not initialized" << dendl;
|
||||
|
||||
image_ctx->snap_lock.put_read();
|
||||
image_ctx->owner_lock.put_read();
|
||||
this->complete(-EINVAL);
|
||||
image_ctx.snap_lock.put_read();
|
||||
image_ctx.owner_lock.put_read();
|
||||
finish_op(-EINVAL);
|
||||
return;
|
||||
}
|
||||
|
||||
int r;
|
||||
m_finish_op_ctx = image_ctx->exclusive_lock->start_op(&r);
|
||||
m_finish_op_ctx = image_ctx.exclusive_lock->start_op(&r);
|
||||
if (m_finish_op_ctx == nullptr) {
|
||||
lderr(m_cct) << "lost exclusive lock" << dendl;
|
||||
image_ctx->snap_lock.put_read();
|
||||
image_ctx->owner_lock.put_read();
|
||||
this->complete(r);
|
||||
image_ctx.snap_lock.put_read();
|
||||
image_ctx.owner_lock.put_read();
|
||||
finish_op(r);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -160,17 +231,17 @@ public:
|
||||
C_SparsifyObject<I>,
|
||||
&C_SparsifyObject<I>::handle_pre_update_object_map>(this);
|
||||
|
||||
image_ctx->object_map_lock.get_write();
|
||||
bool sent = image_ctx->object_map->template aio_update<
|
||||
image_ctx.object_map_lock.get_write();
|
||||
bool sent = image_ctx.object_map->template aio_update<
|
||||
Context, &Context::complete>(CEPH_NOSNAP, m_object_no, OBJECT_PENDING,
|
||||
OBJECT_EXISTS, {}, false, ctx);
|
||||
|
||||
// NOTE: state machine might complete before we reach here
|
||||
image_ctx->object_map_lock.put_write();
|
||||
image_ctx->snap_lock.put_read();
|
||||
image_ctx->owner_lock.put_read();
|
||||
image_ctx.object_map_lock.put_write();
|
||||
image_ctx.snap_lock.put_read();
|
||||
image_ctx.owner_lock.put_read();
|
||||
if (!sent) {
|
||||
ctx->complete(0);
|
||||
finish_op(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -184,7 +255,11 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
send_check_exists();
|
||||
if (m_trying_trim) {
|
||||
send_trim();
|
||||
} else {
|
||||
send_check_exists();
|
||||
}
|
||||
}
|
||||
|
||||
void send_check_exists() {
|
||||
@ -194,10 +269,10 @@ public:
|
||||
|
||||
librados::ObjectReadOperation op;
|
||||
op.stat(NULL, NULL, NULL);
|
||||
m_out_bl.clear();
|
||||
m_bl.clear();
|
||||
auto comp = create_rados_callback<
|
||||
C_SparsifyObject, &C_SparsifyObject::handle_check_exists>(this);
|
||||
int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_out_bl);
|
||||
int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl);
|
||||
ceph_assert(r == 0);
|
||||
comp->release();
|
||||
}
|
||||
@ -217,6 +292,8 @@ public:
|
||||
void send_post_update_object_map(bool exists) {
|
||||
I &image_ctx = this->m_image_ctx;
|
||||
|
||||
ldout(m_cct, 20) << dendl;
|
||||
|
||||
auto ctx = create_context_callback<
|
||||
C_SparsifyObject<I>,
|
||||
&C_SparsifyObject<I>::handle_post_update_object_map>(this);
|
||||
@ -253,10 +330,100 @@ public:
|
||||
finish_op(0);
|
||||
}
|
||||
|
||||
void send_read() {
|
||||
I &image_ctx = this->m_image_ctx;
|
||||
|
||||
ldout(m_cct, 20) << dendl;
|
||||
|
||||
librados::ObjectReadOperation op;
|
||||
m_bl.clear();
|
||||
op.sparse_read(0, image_ctx.layout.object_size, &m_extent_map, &m_bl,
|
||||
nullptr);
|
||||
auto comp = create_rados_callback<
|
||||
C_SparsifyObject, &C_SparsifyObject::handle_read>(this);
|
||||
int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl);
|
||||
ceph_assert(r == 0);
|
||||
comp->release();
|
||||
}
|
||||
|
||||
void handle_read(int r) {
|
||||
ldout(m_cct, 20) << "r=" << r << dendl;
|
||||
|
||||
if (r < 0) {
|
||||
if (r == -ENOENT) {
|
||||
r = 0;
|
||||
} else {
|
||||
lderr(m_cct) << "failed to read object: " << cpp_strerror(r) << dendl;
|
||||
}
|
||||
finish_op(r);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!may_be_trimmed(m_extent_map, m_bl, m_sparse_size, &m_new_end)) {
|
||||
finish_op(0);
|
||||
return;
|
||||
}
|
||||
|
||||
send_pre_update_object_map();
|
||||
}
|
||||
|
||||
void send_trim() {
|
||||
I &image_ctx = this->m_image_ctx;
|
||||
|
||||
ldout(m_cct, 20) << dendl;
|
||||
|
||||
ceph_assert(m_new_end < image_ctx.layout.object_size);
|
||||
|
||||
librados::ObjectWriteOperation op;
|
||||
m_bl.clear();
|
||||
m_bl.append_zero(image_ctx.layout.object_size - m_new_end);
|
||||
op.cmpext(m_new_end, m_bl, nullptr);
|
||||
if (m_new_end == 0 && m_remove_empty) {
|
||||
op.remove();
|
||||
} else {
|
||||
op.truncate(m_new_end);
|
||||
}
|
||||
|
||||
auto comp = create_rados_callback<
|
||||
C_SparsifyObject, &C_SparsifyObject::handle_trim>(this);
|
||||
int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op);
|
||||
ceph_assert(r == 0);
|
||||
comp->release();
|
||||
}
|
||||
|
||||
void handle_trim(int r) {
|
||||
I &image_ctx = this->m_image_ctx;
|
||||
|
||||
ldout(m_cct, 20) << "r=" << r << dendl;
|
||||
|
||||
if (r <= -MAX_ERRNO) {
|
||||
m_finish_op_ctx->complete(0);
|
||||
m_finish_op_ctx = nullptr;
|
||||
send_read();
|
||||
return;
|
||||
}
|
||||
|
||||
if (r < 0 && r != -ENOENT) {
|
||||
lderr(m_cct) << "failed to trim: " << cpp_strerror(r) << dendl;
|
||||
finish_op(r);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!m_remove_empty || m_new_end != 0 ||
|
||||
!image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
|
||||
finish_op(0);
|
||||
return;
|
||||
}
|
||||
|
||||
send_post_update_object_map(false);
|
||||
}
|
||||
|
||||
void finish_op(int r) {
|
||||
ldout(m_cct, 20) << "r=" << r << dendl;
|
||||
|
||||
m_finish_op_ctx->complete(0);
|
||||
if (m_finish_op_ctx != nullptr) {
|
||||
m_finish_op_ctx->complete(0);
|
||||
}
|
||||
this->complete(r);
|
||||
}
|
||||
|
||||
@ -267,7 +434,10 @@ private:
|
||||
std::string m_oid;
|
||||
|
||||
bool m_remove_empty = false;
|
||||
bufferlist m_out_bl;
|
||||
bool m_trying_trim = false;
|
||||
bufferlist m_bl;
|
||||
std::map<uint64_t,uint64_t> m_extent_map;
|
||||
uint64_t m_new_end = 0;
|
||||
Context *m_finish_op_ctx = nullptr;
|
||||
};
|
||||
|
||||
|
@ -1365,11 +1365,14 @@ TEST_F(TestInternal, Sparsify) {
|
||||
librbd::ImageCtx *ictx;
|
||||
ASSERT_EQ(0, open_image(m_image_name, &ictx));
|
||||
|
||||
REQUIRE(is_sparsify_supported(ictx->data_ctx, ictx->get_object_name(10)));
|
||||
|
||||
bool sparsify_supported = is_sparsify_supported(ictx->data_ctx,
|
||||
ictx->get_object_name(10));
|
||||
bool sparse_read_supported = is_sparse_read_supported(
|
||||
ictx->data_ctx, ictx->get_object_name(10));
|
||||
|
||||
std::cout << "sparsify_supported=" << sparsify_supported << std::endl;
|
||||
std::cout << "sparse_read_supported=" << sparse_read_supported << std::endl;
|
||||
|
||||
librbd::NoOpProgressContext no_op;
|
||||
ASSERT_EQ(0, ictx->operations->resize((1 << ictx->order) * 20, true, no_op));
|
||||
|
||||
@ -1379,13 +1382,24 @@ TEST_F(TestInternal, Sparsify) {
|
||||
ASSERT_EQ((ssize_t)bl.length(),
|
||||
ictx->io_work_queue->write(0, bl.length(), bufferlist{bl}, 0));
|
||||
|
||||
ASSERT_EQ((ssize_t)bl.length(),
|
||||
ictx->io_work_queue->write((1 << ictx->order) * 1 + 512,
|
||||
bl.length(), bufferlist{bl}, 0));
|
||||
|
||||
bl.append(std::string(4096, '1'));
|
||||
bl.append(std::string(4096, '\0'));
|
||||
bl.append(std::string(4096, '2'));
|
||||
bl.append(std::string(4096, '\0'));
|
||||
bl.append(std::string(4096 - 1, '\0'));
|
||||
ASSERT_EQ((ssize_t)bl.length(),
|
||||
ictx->io_work_queue->write((1 << ictx->order) * 10, bl.length(),
|
||||
bufferlist{bl}, 0));
|
||||
|
||||
bufferlist bl2;
|
||||
bl2.append(std::string(4096 - 1, '\0'));
|
||||
ASSERT_EQ((ssize_t)bl2.length(),
|
||||
ictx->io_work_queue->write((1 << ictx->order) * 10 + 4096 * 10,
|
||||
bl2.length(), bufferlist{bl2}, 0));
|
||||
|
||||
ASSERT_EQ(0, ictx->io_work_queue->flush());
|
||||
|
||||
ASSERT_EQ(0, ictx->operations->sparsify(4096, no_op));
|
||||
@ -1404,21 +1418,30 @@ TEST_F(TestInternal, Sparsify) {
|
||||
uint64_t size;
|
||||
ASSERT_EQ(-ENOENT, ictx->data_ctx.stat(oid, &size, NULL));
|
||||
|
||||
if (!sparse_read_supported) {
|
||||
return;
|
||||
}
|
||||
oid = ictx->get_object_name(1);
|
||||
ASSERT_EQ(-ENOENT, ictx->data_ctx.stat(oid, &size, NULL));
|
||||
|
||||
oid = ictx->get_object_name(10);
|
||||
std::map<uint64_t, uint64_t> m;
|
||||
read_bl.clear();
|
||||
ASSERT_EQ(2, ictx->data_ctx.sparse_read(oid, m, read_bl, bl.length(), 0));
|
||||
std::map<uint64_t, uint64_t> expected_m =
|
||||
{{4096 * 1, 4096}, {4096 * 3, 4096}};
|
||||
ASSERT_EQ(m, expected_m);
|
||||
std::map<uint64_t, uint64_t> expected_m;
|
||||
auto read_len = bl.length();
|
||||
bl.clear();
|
||||
bl.append(std::string(4096, '1'));
|
||||
bl.append(std::string(4096, '2'));
|
||||
ASSERT_TRUE(bl.contents_equal(read_bl));
|
||||
if (sparsify_supported && sparse_read_supported) {
|
||||
expected_m = {{4096 * 1, 4096}, {4096 * 3, 4096}};
|
||||
bl.append(std::string(4096, '1'));
|
||||
bl.append(std::string(4096, '2'));
|
||||
} else {
|
||||
expected_m = {{0, 4096 * 4}};
|
||||
bl.append(std::string(4096, '\0'));
|
||||
bl.append(std::string(4096, '1'));
|
||||
bl.append(std::string(4096, '\0'));
|
||||
bl.append(std::string(4096, '2'));
|
||||
}
|
||||
read_bl.clear();
|
||||
EXPECT_EQ(static_cast<int>(expected_m.size()),
|
||||
ictx->data_ctx.sparse_read(oid, m, read_bl, read_len, 0));
|
||||
EXPECT_EQ(m, expected_m);
|
||||
EXPECT_TRUE(bl.contents_equal(read_bl));
|
||||
}
|
||||
|
||||
|
||||
@ -1428,10 +1451,9 @@ TEST_F(TestInternal, SparsifyClone) {
|
||||
librbd::ImageCtx *ictx;
|
||||
ASSERT_EQ(0, open_image(m_image_name, &ictx));
|
||||
|
||||
REQUIRE(is_sparsify_supported(ictx->data_ctx, ictx->get_object_name(10)));
|
||||
|
||||
bool sparse_read_supported = is_sparse_read_supported(
|
||||
ictx->data_ctx, ictx->get_object_name(10));
|
||||
bool sparsify_supported = is_sparsify_supported(ictx->data_ctx,
|
||||
ictx->get_object_name(10));
|
||||
std::cout << "sparsify_supported=" << sparsify_supported << std::endl;
|
||||
|
||||
librbd::NoOpProgressContext no_op;
|
||||
ASSERT_EQ(0, ictx->operations->resize((1 << ictx->order) * 10, true, no_op));
|
||||
@ -1484,20 +1506,4 @@ TEST_F(TestInternal, SparsifyClone) {
|
||||
uint64_t size;
|
||||
ASSERT_EQ(0, ictx->data_ctx.stat(oid, &size, NULL));
|
||||
ASSERT_EQ(0, ictx->data_ctx.read(oid, read_bl, 4096, 0));
|
||||
|
||||
if (!sparse_read_supported) {
|
||||
return;
|
||||
}
|
||||
|
||||
oid = ictx->get_object_name(10);
|
||||
std::map<uint64_t, uint64_t> m;
|
||||
read_bl.clear();
|
||||
ASSERT_EQ(2, ictx->data_ctx.sparse_read(oid, m, read_bl, bl.length(), 0));
|
||||
std::map<uint64_t, uint64_t> expected_m =
|
||||
{{4096 * 1, 4096}, {4096 * 3, 4096}};
|
||||
ASSERT_EQ(m, expected_m);
|
||||
bl.clear();
|
||||
bl.append(std::string(4096, '1'));
|
||||
bl.append(std::string(4096, '2'));
|
||||
ASSERT_TRUE(bl.contents_equal(read_bl));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user