mirror of
https://github.com/ceph/ceph
synced 2025-01-04 02:02:36 +00:00
librbd: keep access/modified timestamp updates out of IO path
If the cache is enabled, it was possible for the timestamp updates to cause race conditions with librbd clients (e.g. rbd export) that had data corrupting effects since callbacks were expected to be serialized. Fixes: http://tracker.ceph.com/issues/37745 Signed-off-by: Jason Dillaman <dillaman@redhat.com>
This commit is contained in:
parent
d0af9eaa73
commit
5f3cb951c5
@ -9,6 +9,7 @@
|
|||||||
#include "librbd/Utils.h"
|
#include "librbd/Utils.h"
|
||||||
#include "librbd/cache/ImageCache.h"
|
#include "librbd/cache/ImageCache.h"
|
||||||
#include "librbd/io/AioCompletion.h"
|
#include "librbd/io/AioCompletion.h"
|
||||||
|
#include "librbd/io/AsyncOperation.h"
|
||||||
#include "librbd/io/ObjectDispatchInterface.h"
|
#include "librbd/io/ObjectDispatchInterface.h"
|
||||||
#include "librbd/io/ObjectDispatchSpec.h"
|
#include "librbd/io/ObjectDispatchSpec.h"
|
||||||
#include "librbd/io/ObjectDispatcher.h"
|
#include "librbd/io/ObjectDispatcher.h"
|
||||||
@ -18,6 +19,7 @@
|
|||||||
#include "common/perf_counters.h"
|
#include "common/perf_counters.h"
|
||||||
#include "common/WorkQueue.h"
|
#include "common/WorkQueue.h"
|
||||||
#include "osdc/Striper.h"
|
#include "osdc/Striper.h"
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
#define dout_subsys ceph_subsys_rbd
|
#define dout_subsys ceph_subsys_rbd
|
||||||
#undef dout_prefix
|
#undef dout_prefix
|
||||||
@ -34,37 +36,41 @@ namespace {
|
|||||||
template <typename I>
|
template <typename I>
|
||||||
struct C_UpdateTimestamp : public Context {
|
struct C_UpdateTimestamp : public Context {
|
||||||
public:
|
public:
|
||||||
I* m_image_ctx;
|
I& m_image_ctx;
|
||||||
AioCompletion* m_aio_comp;
|
bool m_modify; // if modify set to 'true', modify timestamp is updated,
|
||||||
bool modify; //if modify set to 'true', modify timestamp is updated, access timestamp otherwise
|
// access timestamp otherwise
|
||||||
|
AsyncOperation m_async_op;
|
||||||
|
|
||||||
C_UpdateTimestamp(I* ictx, AioCompletion *c, bool m)
|
C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) {
|
||||||
: m_image_ctx(ictx), m_aio_comp(c), modify(m) {
|
m_async_op.start_op(*get_image_ctx(&m_image_ctx));
|
||||||
m_aio_comp->add_request();
|
}
|
||||||
|
~C_UpdateTimestamp() override {
|
||||||
|
m_async_op.finish_op();
|
||||||
}
|
}
|
||||||
|
|
||||||
void send() {
|
void send() {
|
||||||
librados::ObjectWriteOperation op;
|
librados::ObjectWriteOperation op;
|
||||||
if(modify)
|
if (m_modify) {
|
||||||
cls_client::set_modify_timestamp(&op);
|
cls_client::set_modify_timestamp(&op);
|
||||||
else
|
} else {
|
||||||
cls_client::set_access_timestamp(&op);
|
cls_client::set_access_timestamp(&op);
|
||||||
|
}
|
||||||
|
|
||||||
librados::AioCompletion *comp = librbd::util::create_rados_callback(this);
|
auto comp = librbd::util::create_rados_callback(this);
|
||||||
int r = m_image_ctx->md_ctx.aio_operate(m_image_ctx->header_oid, comp, &op);
|
int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
|
||||||
ceph_assert(r == 0);
|
ceph_assert(r == 0);
|
||||||
comp->release();
|
comp->release();
|
||||||
}
|
}
|
||||||
|
|
||||||
void finish(int r) override {
|
void finish(int r) override {
|
||||||
// ignore errors updating timestamp
|
// ignore errors updating timestamp
|
||||||
m_aio_comp->complete_request(0);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
|
bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
|
||||||
uint64_t interval) {
|
uint64_t interval) {
|
||||||
return (interval && (static_cast<uint64_t>(now.sec()) >= interval + timestamp));
|
return (interval &&
|
||||||
|
(static_cast<uint64_t>(now.sec()) >= interval + timestamp));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
@ -151,6 +157,7 @@ void ImageRequest<I>::send() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) {
|
if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) {
|
||||||
|
update_timestamp();
|
||||||
send_request();
|
send_request();
|
||||||
} else {
|
} else {
|
||||||
send_image_cache_request();
|
send_image_cache_request();
|
||||||
@ -172,6 +179,58 @@ int ImageRequest<I>::clip_request() {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename I>
|
||||||
|
void ImageRequest<I>::update_timestamp() {
|
||||||
|
bool modify = (get_aio_type() != AIO_TYPE_READ);
|
||||||
|
uint64_t update_interval;
|
||||||
|
if (modify) {
|
||||||
|
update_interval = m_image_ctx.mtime_update_interval;
|
||||||
|
} else {
|
||||||
|
update_interval = m_image_ctx.atime_update_interval;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (update_interval == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
utime_t (I::*get_timestamp_fn)() const;
|
||||||
|
void (I::*set_timestamp_fn)(utime_t);
|
||||||
|
if (modify) {
|
||||||
|
get_timestamp_fn = &I::get_modify_timestamp;
|
||||||
|
set_timestamp_fn = &I::set_modify_timestamp;
|
||||||
|
} else {
|
||||||
|
get_timestamp_fn = &I::get_access_timestamp;
|
||||||
|
set_timestamp_fn = &I::set_access_timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
utime_t ts = ceph_clock_now();
|
||||||
|
{
|
||||||
|
RWLock::RLocker timestamp_locker(m_image_ctx.timestamp_lock);
|
||||||
|
if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx),
|
||||||
|
update_interval)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
RWLock::WLocker timestamp_locker(m_image_ctx.timestamp_lock);
|
||||||
|
bool update = should_update_timestamp(
|
||||||
|
ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval);
|
||||||
|
if (!update) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::invoke(set_timestamp_fn, m_image_ctx, ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO we fire and forget this outside the IO path to prevent
|
||||||
|
// potential race conditions with librbd client IO callbacks
|
||||||
|
// between different threads (e.g. librados and object cacher)
|
||||||
|
ldout(m_image_ctx.cct, 10) << get_request_type() << dendl;
|
||||||
|
auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify);
|
||||||
|
req->send();
|
||||||
|
}
|
||||||
|
|
||||||
template <typename I>
|
template <typename I>
|
||||||
ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
|
ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
|
||||||
Extents &&image_extents,
|
Extents &&image_extents,
|
||||||
@ -238,28 +297,7 @@ void ImageReadRequest<I>::send_request() {
|
|||||||
for (auto &object_extent : object_extents) {
|
for (auto &object_extent : object_extents) {
|
||||||
request_count += object_extent.second.size();
|
request_count += object_extent.second.size();
|
||||||
}
|
}
|
||||||
|
aio_comp->set_request_count(request_count);
|
||||||
utime_t ts = ceph_clock_now();
|
|
||||||
|
|
||||||
image_ctx.timestamp_lock.get_read();
|
|
||||||
if(should_update_timestamp(ts,image_ctx.get_access_timestamp(),
|
|
||||||
image_ctx.atime_update_interval)) {
|
|
||||||
image_ctx.timestamp_lock.put_read();
|
|
||||||
image_ctx.timestamp_lock.get_write();
|
|
||||||
if(should_update_timestamp(ts,image_ctx.get_access_timestamp(),
|
|
||||||
image_ctx.atime_update_interval)) {
|
|
||||||
aio_comp->set_request_count(request_count + 1);
|
|
||||||
auto update_ctx = new C_UpdateTimestamp<I>(&image_ctx, aio_comp, false);
|
|
||||||
update_ctx->send();
|
|
||||||
image_ctx.set_access_timestamp(ts);
|
|
||||||
} else {
|
|
||||||
aio_comp->set_request_count(request_count);
|
|
||||||
}
|
|
||||||
image_ctx.timestamp_lock.put_write();
|
|
||||||
} else {
|
|
||||||
image_ctx.timestamp_lock.put_read();
|
|
||||||
aio_comp->set_request_count(request_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
// issue the requests
|
// issue the requests
|
||||||
for (auto &object_extent : object_extents) {
|
for (auto &object_extent : object_extents) {
|
||||||
@ -346,34 +384,13 @@ void AbstractImageWriteRequest<I>::send_request() {
|
|||||||
|
|
||||||
if (!object_extents.empty()) {
|
if (!object_extents.empty()) {
|
||||||
uint64_t journal_tid = 0;
|
uint64_t journal_tid = 0;
|
||||||
|
|
||||||
utime_t ts = ceph_clock_now();
|
|
||||||
image_ctx.timestamp_lock.get_read();
|
|
||||||
if(should_update_timestamp(ts,image_ctx.get_modify_timestamp(),
|
|
||||||
image_ctx.mtime_update_interval)) {
|
|
||||||
image_ctx.timestamp_lock.put_read();
|
|
||||||
image_ctx.timestamp_lock.get_write();
|
|
||||||
if(should_update_timestamp(ts,image_ctx.get_modify_timestamp(),
|
|
||||||
image_ctx.mtime_update_interval)) {
|
|
||||||
aio_comp->set_request_count(object_extents.size() + 1);
|
|
||||||
auto update_ctx = new C_UpdateTimestamp<I>(&image_ctx, aio_comp, true);
|
|
||||||
update_ctx->send();
|
|
||||||
image_ctx.set_modify_timestamp(ts);
|
|
||||||
} else {
|
|
||||||
aio_comp->set_request_count(object_extents.size());
|
|
||||||
}
|
|
||||||
image_ctx.timestamp_lock.put_write();
|
|
||||||
} else {
|
|
||||||
image_ctx.timestamp_lock.put_read();
|
|
||||||
aio_comp->set_request_count(object_extents.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (journaling) {
|
if (journaling) {
|
||||||
// in-flight ops are flushed prior to closing the journal
|
// in-flight ops are flushed prior to closing the journal
|
||||||
ceph_assert(image_ctx.journal != NULL);
|
ceph_assert(image_ctx.journal != NULL);
|
||||||
journal_tid = append_journal_event(m_synchronous);
|
journal_tid = append_journal_event(m_synchronous);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
aio_comp->set_request_count(object_extents.size());
|
||||||
send_object_requests(object_extents, snapc, journal_tid);
|
send_object_requests(object_extents, snapc, journal_tid);
|
||||||
} else {
|
} else {
|
||||||
// no IO to perform -- fire completion
|
// no IO to perform -- fire completion
|
||||||
|
@ -84,6 +84,7 @@ protected:
|
|||||||
|
|
||||||
|
|
||||||
virtual int clip_request();
|
virtual int clip_request();
|
||||||
|
virtual void update_timestamp();
|
||||||
virtual void send_request() = 0;
|
virtual void send_request() = 0;
|
||||||
virtual void send_image_cache_request() = 0;
|
virtual void send_image_cache_request() = 0;
|
||||||
|
|
||||||
@ -248,6 +249,8 @@ protected:
|
|||||||
int clip_request() override {
|
int clip_request() override {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
void update_timestamp() override {
|
||||||
|
}
|
||||||
void send_request() override;
|
void send_request() override;
|
||||||
void send_image_cache_request() override;
|
void send_image_cache_request() override;
|
||||||
|
|
||||||
|
@ -82,8 +82,9 @@ struct TestMockIoImageRequest : public TestMockFixture {
|
|||||||
EXPECT_CALL(mock_image_ctx, get_modify_timestamp())
|
EXPECT_CALL(mock_image_ctx, get_modify_timestamp())
|
||||||
.WillOnce(Return(ceph_clock_now() - utime_t(10,0)));
|
.WillOnce(Return(ceph_clock_now() - utime_t(10,0)));
|
||||||
} else {
|
} else {
|
||||||
mock_image_ctx.mtime_update_interval = 0;
|
mock_image_ctx.mtime_update_interval = 600;
|
||||||
EXPECT_CALL(mock_image_ctx, get_modify_timestamp());
|
EXPECT_CALL(mock_image_ctx, get_modify_timestamp())
|
||||||
|
.WillOnce(Return(ceph_clock_now()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -226,8 +227,8 @@ TEST_F(TestMockIoImageRequest, AioWriteJournalAppendDisabled) {
|
|||||||
mock_image_ctx.journal = &mock_journal;
|
mock_image_ctx.journal = &mock_journal;
|
||||||
|
|
||||||
InSequence seq;
|
InSequence seq;
|
||||||
expect_is_journal_appending(mock_journal, false);
|
|
||||||
expect_get_modify_timestamp(mock_image_ctx, false);
|
expect_get_modify_timestamp(mock_image_ctx, false);
|
||||||
|
expect_is_journal_appending(mock_journal, false);
|
||||||
expect_object_request_send(mock_image_ctx, 0);
|
expect_object_request_send(mock_image_ctx, 0);
|
||||||
|
|
||||||
C_SaferCond aio_comp_ctx;
|
C_SaferCond aio_comp_ctx;
|
||||||
@ -256,8 +257,8 @@ TEST_F(TestMockIoImageRequest, AioDiscardJournalAppendDisabled) {
|
|||||||
mock_image_ctx.journal = &mock_journal;
|
mock_image_ctx.journal = &mock_journal;
|
||||||
|
|
||||||
InSequence seq;
|
InSequence seq;
|
||||||
expect_is_journal_appending(mock_journal, false);
|
|
||||||
expect_get_modify_timestamp(mock_image_ctx, false);
|
expect_get_modify_timestamp(mock_image_ctx, false);
|
||||||
|
expect_is_journal_appending(mock_journal, false);
|
||||||
expect_object_request_send(mock_image_ctx, 0);
|
expect_object_request_send(mock_image_ctx, 0);
|
||||||
|
|
||||||
C_SaferCond aio_comp_ctx;
|
C_SaferCond aio_comp_ctx;
|
||||||
@ -314,8 +315,8 @@ TEST_F(TestMockIoImageRequest, AioWriteSameJournalAppendDisabled) {
|
|||||||
mock_image_ctx.journal = &mock_journal;
|
mock_image_ctx.journal = &mock_journal;
|
||||||
|
|
||||||
InSequence seq;
|
InSequence seq;
|
||||||
expect_is_journal_appending(mock_journal, false);
|
|
||||||
expect_get_modify_timestamp(mock_image_ctx, false);
|
expect_get_modify_timestamp(mock_image_ctx, false);
|
||||||
|
expect_is_journal_appending(mock_journal, false);
|
||||||
expect_object_request_send(mock_image_ctx, 0);
|
expect_object_request_send(mock_image_ctx, 0);
|
||||||
|
|
||||||
C_SaferCond aio_comp_ctx;
|
C_SaferCond aio_comp_ctx;
|
||||||
@ -345,8 +346,8 @@ TEST_F(TestMockIoImageRequest, AioCompareAndWriteJournalAppendDisabled) {
|
|||||||
mock_image_ctx.journal = &mock_journal;
|
mock_image_ctx.journal = &mock_journal;
|
||||||
|
|
||||||
InSequence seq;
|
InSequence seq;
|
||||||
expect_is_journal_appending(mock_journal, false);
|
|
||||||
expect_get_modify_timestamp(mock_image_ctx, false);
|
expect_get_modify_timestamp(mock_image_ctx, false);
|
||||||
|
expect_is_journal_appending(mock_journal, false);
|
||||||
expect_object_request_send(mock_image_ctx, 0);
|
expect_object_request_send(mock_image_ctx, 0);
|
||||||
|
|
||||||
C_SaferCond aio_comp_ctx;
|
C_SaferCond aio_comp_ctx;
|
||||||
|
Loading…
Reference in New Issue
Block a user