Merge pull request #48301 from PepperJo/cmp_write_C_vec

librbd: add compare-and-write vector C API

Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Ilya Dryomov 2022-10-06 14:07:36 +02:00 committed by GitHub
commit 06bb82c10c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 227 additions and 0 deletions

View File

@ -55,6 +55,9 @@
* RBD: compare-and-write operation is no longer limited to 512-byte sectors.
Assuming proper alignment, it now allows operating on stripe units (4M by
default).
* RBD: New `rbd_aio_compare_and_writev` API method to support scatter/gather
on both compare and write buffers. This compliments existing `rbd_aio_readv`
and `rbd_aio_writev` methods.
* The 'AT_NO_ATTR_SYNC' macro is deprecated, please use the standard 'AT_STATX_DONT_SYNC'
macro. The 'AT_NO_ATTR_SYNC' macro will be removed in the future.
* Trimming of PGLog dups is now controlled by the size instead of the version.

View File

@ -42,6 +42,7 @@ extern "C" {
#define LIBRBD_SUPPORTS_AIO_FLUSH 1
#define LIBRBD_SUPPORTS_AIO_OPEN 1
#define LIBRBD_SUPPORTS_COMPARE_AND_WRITE 1
#define LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC 1
#define LIBRBD_SUPPORTS_LOCKING 1
#define LIBRBD_SUPPORTS_INVALIDATE 1
#define LIBRBD_SUPPORTS_IOVEC 1
@ -1195,6 +1196,15 @@ CEPH_RBD_API ssize_t rbd_aio_compare_and_write(rbd_image_t image,
rbd_completion_t c,
uint64_t *mismatch_off,
int op_flags);
CEPH_RBD_API ssize_t rbd_aio_compare_and_writev(rbd_image_t image,
uint64_t off,
const struct iovec *cmp_iov,
int cmp_iovcnt,
const struct iovec *iov,
int iovcnt,
rbd_completion_t c,
uint64_t *mismatch_off,
int op_flags);
CEPH_RBD_API int rbd_aio_create_completion(void *cb_arg,
rbd_callback_t complete_cb,

View File

@ -6352,6 +6352,52 @@ extern "C" ssize_t rbd_aio_compare_and_write(rbd_image_t image, uint64_t off,
return 0;
}
extern "C" ssize_t rbd_aio_compare_and_writev(rbd_image_t image,
uint64_t off,
const struct iovec *cmp_iov,
int cmp_iovcnt,
const struct iovec *iov,
int iovcnt,
rbd_completion_t c,
uint64_t *mismatch_off,
int op_flags)
{
librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
size_t cmp_len;
int r = get_iovec_length(cmp_iov, cmp_iovcnt, cmp_len);
tracepoint(librbd, aio_compare_and_write_enter, ictx, ictx->name.c_str(),
ictx->snap_name.c_str(), ictx->read_only, off, cmp_len, NULL, NULL,
comp->pc, op_flags);
if (r != 0) {
tracepoint(librbd, aio_compare_and_write_exit, r);
return r;
}
size_t write_len;
r = get_iovec_length(iov, iovcnt, write_len);
if (r != 0) {
tracepoint(librbd, aio_compare_and_write_exit, r);
return r;
}
if (cmp_len != write_len) {
tracepoint(librbd, aio_compare_and_write_exit, -EINVAL);
return -EINVAL;
}
auto aio_completion = get_aio_completion(comp);
auto cmp_bl = iovec_to_bufferlist(ictx, cmp_iov, cmp_iovcnt, aio_completion);
auto bl = iovec_to_bufferlist(ictx, iov, iovcnt, aio_completion);
librbd::api::Io<>::aio_compare_and_write(*ictx, aio_completion, off, cmp_len,
std::move(cmp_bl), std::move(bl),
mismatch_off, op_flags, false);
tracepoint(librbd, aio_compare_and_write_exit, 0);
return 0;
}
extern "C" int rbd_invalidate_cache(rbd_image_t image)
{
librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;

View File

@ -3099,6 +3099,174 @@ TEST_F(TestLibRBD, TestAioCompareAndWriteStripeUnitSuccess)
rados_ioctx_destroy(ioctx);
}
TEST_F(TestLibRBD, TestAioCompareAndWriteVIovecLenDiffers)
{
rados_ioctx_t ioctx;
rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
rbd_image_t image;
int order = 0;
std::string name = get_temp_image_name();
uint64_t size = 20 << 20; /* 20MiB */
off_t off = 512;
ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL));
std::string cmp_buffer("This is a test");
size_t cmp_len = cmp_buffer.length();
std::string write_buffer("Write this !!!");
struct iovec write_iovs[] = {
{.iov_base = &write_buffer[0], .iov_len = 6},
{.iov_base = &write_buffer[6], .iov_len = 5},
{.iov_base = &write_buffer[11], .iov_len = 3}
};
ASSERT_EQ(cmp_len, rbd_write(image, off, cmp_len, cmp_buffer.data()));
// should fail because compare iovec len cannot be different to write iovec len
rbd_completion_t comp;
rbd_aio_create_completion(NULL, NULL, &comp);
uint64_t mismatch_off = 0;
int ret = rbd_aio_compare_and_writev(image, off,
write_iovs /* cmp_iovs */, 1,
write_iovs, std::size(write_iovs),
comp, &mismatch_off, 0);
ASSERT_EQ(-EINVAL, ret);
ASSERT_EQ(0U, mismatch_off);
rbd_aio_release(comp);
// check nothing was written
std::string read_buffer(cmp_buffer.length(), '1');
ssize_t read = rbd_read(image, off, read_buffer.length(), read_buffer.data());
ASSERT_EQ(read_buffer.length(), read);
ASSERT_EQ(cmp_buffer, read_buffer);
ASSERT_PASSED(validate_object_map, image);
ASSERT_EQ(0, rbd_close(image));
rados_ioctx_destroy(ioctx);
}
TEST_F(TestLibRBD, TestAioCompareAndWriteVMismatch)
{
rados_ioctx_t ioctx;
rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
rbd_image_t image;
int order = 0;
std::string name = get_temp_image_name();
uint64_t size = 20 << 20; /* 20MiB */
off_t off = 512;
ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL));
std::string cmp_buffer("This is a test");
int cmp_len = cmp_buffer.length();
std::string write_buffer("Write this !!!");
struct iovec write_iovs[] = {
{.iov_base = &write_buffer[0], .iov_len = 6},
{.iov_base = &write_buffer[6], .iov_len = 5},
{.iov_base = &write_buffer[11], .iov_len = 3}
};
std::string mismatch_buffer("This will fail");
struct iovec mismatch_iovs[] = {
{.iov_base = &mismatch_buffer[0], .iov_len = 5},
{.iov_base = &mismatch_buffer[5], .iov_len = 5},
{.iov_base = &mismatch_buffer[10], .iov_len = 4}
};
ASSERT_EQ(cmp_len, rbd_write(image, off, cmp_len, cmp_buffer.data()));
// this should execute the compare but fail because of mismatch
rbd_completion_t comp;
rbd_aio_create_completion(NULL, NULL, &comp);
uint64_t mismatch_off = 0;
int ret = rbd_aio_compare_and_writev(image, off,
mismatch_iovs /* cmp_iovs */,
std::size(mismatch_iovs),
write_iovs, std::size(write_iovs),
comp, &mismatch_off, 0);
ASSERT_EQ(0, ret);
ASSERT_EQ(0, rbd_aio_wait_for_complete(comp));
ASSERT_EQ(-EILSEQ, rbd_aio_get_return_value(comp));
ASSERT_EQ(5U, mismatch_off);
rbd_aio_release(comp);
// check nothing was written
std::string read_buffer(cmp_buffer.length(), '1');
ssize_t read = rbd_read(image, off, read_buffer.length(), read_buffer.data());
ASSERT_EQ(read_buffer.length(), read);
ASSERT_EQ(cmp_buffer, read_buffer);
ASSERT_PASSED(validate_object_map, image);
ASSERT_EQ(0, rbd_close(image));
rados_ioctx_destroy(ioctx);
}
TEST_F(TestLibRBD, TestAioCompareAndWriteVSuccess)
{
rados_ioctx_t ioctx;
rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
rbd_image_t image;
int order = 0;
std::string name = get_temp_image_name();
uint64_t size = 20 << 20; /* 20MiB */
off_t off = 512;
ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL));
std::string cmp_buffer("This is a test");
struct iovec cmp_iovs[] = {
{.iov_base = &cmp_buffer[0], .iov_len = 5},
{.iov_base = &cmp_buffer[5], .iov_len = 3},
{.iov_base = &cmp_buffer[8], .iov_len = 2},
{.iov_base = &cmp_buffer[10], .iov_len = 4}
};
size_t cmp_len = cmp_buffer.length();
std::string write_buffer("Write this !!!");
struct iovec write_iovs[] = {
{.iov_base = &write_buffer[0], .iov_len = 6},
{.iov_base = &write_buffer[6], .iov_len = 5},
{.iov_base = &write_buffer[11], .iov_len = 3}
};
ASSERT_EQ(cmp_len, rbd_write(image, off, cmp_len, cmp_buffer.data()));
// compare against the buffer written before => should succeed
rbd_completion_t comp;
rbd_aio_create_completion(NULL, NULL, &comp);
uint64_t mismatch_off = 0;
int ret = rbd_aio_compare_and_writev(image, off,
cmp_iovs, std::size(cmp_iovs),
write_iovs, std::size(write_iovs),
comp, &mismatch_off, 0);
ASSERT_EQ(0, ret);
ASSERT_EQ(0, rbd_aio_wait_for_complete(comp));
ASSERT_EQ(0, rbd_aio_get_return_value(comp));
ASSERT_EQ(0U, mismatch_off);
rbd_aio_release(comp);
// check data was successfully written
std::string read_buffer(cmp_buffer.length(), '1');
ssize_t read = rbd_read(image, off, read_buffer.length(), read_buffer.data());
ASSERT_EQ(read_buffer.length(), read);
ASSERT_EQ(write_buffer, read_buffer);
ASSERT_PASSED(validate_object_map, image);
ASSERT_EQ(0, rbd_close(image));
rados_ioctx_destroy(ioctx);
}
TEST_F(TestLibRBD, TestScatterGatherIO)
{
rados_ioctx_t ioctx;