librbd: keep access/modified timestamp updates out of IO path

If the cache is enabled, it was possible for the timestamp updates
to cause race conditions with librbd clients (e.g. rbd export)
that had data corrupting effects since callbacks were expected to
be serialized.

Fixes: http://tracker.ceph.com/issues/37745
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
This commit is contained in:
Jason Dillaman 2019-01-09 14:46:49 -05:00
parent d0af9eaa73
commit 5f3cb951c5
3 changed files with 83 additions and 62 deletions

View File

@ -9,6 +9,7 @@
#include "librbd/Utils.h" #include "librbd/Utils.h"
#include "librbd/cache/ImageCache.h" #include "librbd/cache/ImageCache.h"
#include "librbd/io/AioCompletion.h" #include "librbd/io/AioCompletion.h"
#include "librbd/io/AsyncOperation.h"
#include "librbd/io/ObjectDispatchInterface.h" #include "librbd/io/ObjectDispatchInterface.h"
#include "librbd/io/ObjectDispatchSpec.h" #include "librbd/io/ObjectDispatchSpec.h"
#include "librbd/io/ObjectDispatcher.h" #include "librbd/io/ObjectDispatcher.h"
@ -18,6 +19,7 @@
#include "common/perf_counters.h" #include "common/perf_counters.h"
#include "common/WorkQueue.h" #include "common/WorkQueue.h"
#include "osdc/Striper.h" #include "osdc/Striper.h"
#include <functional>
#define dout_subsys ceph_subsys_rbd #define dout_subsys ceph_subsys_rbd
#undef dout_prefix #undef dout_prefix
@ -34,37 +36,41 @@ namespace {
template <typename I> template <typename I>
struct C_UpdateTimestamp : public Context { struct C_UpdateTimestamp : public Context {
public: public:
I* m_image_ctx; I& m_image_ctx;
AioCompletion* m_aio_comp; bool m_modify; // if modify set to 'true', modify timestamp is updated,
bool modify; //if modify set to 'true', modify timestamp is updated, access timestamp otherwise // access timestamp otherwise
AsyncOperation m_async_op;
C_UpdateTimestamp(I* ictx, AioCompletion *c, bool m) C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) {
: m_image_ctx(ictx), m_aio_comp(c), modify(m) { m_async_op.start_op(*get_image_ctx(&m_image_ctx));
m_aio_comp->add_request(); }
~C_UpdateTimestamp() override {
m_async_op.finish_op();
} }
void send() { void send() {
librados::ObjectWriteOperation op; librados::ObjectWriteOperation op;
if(modify) if (m_modify) {
cls_client::set_modify_timestamp(&op); cls_client::set_modify_timestamp(&op);
else } else {
cls_client::set_access_timestamp(&op); cls_client::set_access_timestamp(&op);
}
librados::AioCompletion *comp = librbd::util::create_rados_callback(this); auto comp = librbd::util::create_rados_callback(this);
int r = m_image_ctx->md_ctx.aio_operate(m_image_ctx->header_oid, comp, &op); int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
ceph_assert(r == 0); ceph_assert(r == 0);
comp->release(); comp->release();
} }
void finish(int r) override { void finish(int r) override {
// ignore errors updating timestamp // ignore errors updating timestamp
m_aio_comp->complete_request(0);
} }
}; };
bool should_update_timestamp(const utime_t& now, const utime_t& timestamp, bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
uint64_t interval) { uint64_t interval) {
return (interval && (static_cast<uint64_t>(now.sec()) >= interval + timestamp)); return (interval &&
(static_cast<uint64_t>(now.sec()) >= interval + timestamp));
} }
} // anonymous namespace } // anonymous namespace
@ -151,6 +157,7 @@ void ImageRequest<I>::send() {
} }
if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) { if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) {
update_timestamp();
send_request(); send_request();
} else { } else {
send_image_cache_request(); send_image_cache_request();
@ -172,6 +179,58 @@ int ImageRequest<I>::clip_request() {
return 0; return 0;
} }
template <typename I>
void ImageRequest<I>::update_timestamp() {
bool modify = (get_aio_type() != AIO_TYPE_READ);
uint64_t update_interval;
if (modify) {
update_interval = m_image_ctx.mtime_update_interval;
} else {
update_interval = m_image_ctx.atime_update_interval;
}
if (update_interval == 0) {
return;
}
utime_t (I::*get_timestamp_fn)() const;
void (I::*set_timestamp_fn)(utime_t);
if (modify) {
get_timestamp_fn = &I::get_modify_timestamp;
set_timestamp_fn = &I::set_modify_timestamp;
} else {
get_timestamp_fn = &I::get_access_timestamp;
set_timestamp_fn = &I::set_access_timestamp;
}
utime_t ts = ceph_clock_now();
{
RWLock::RLocker timestamp_locker(m_image_ctx.timestamp_lock);
if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx),
update_interval)) {
return;
}
}
{
RWLock::WLocker timestamp_locker(m_image_ctx.timestamp_lock);
bool update = should_update_timestamp(
ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval);
if (!update) {
return;
}
std::invoke(set_timestamp_fn, m_image_ctx, ts);
}
// TODO we fire and forget this outside the IO path to prevent
// potential race conditions with librbd client IO callbacks
// between different threads (e.g. librados and object cacher)
ldout(m_image_ctx.cct, 10) << get_request_type() << dendl;
auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify);
req->send();
}
template <typename I> template <typename I>
ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
Extents &&image_extents, Extents &&image_extents,
@ -238,28 +297,7 @@ void ImageReadRequest<I>::send_request() {
for (auto &object_extent : object_extents) { for (auto &object_extent : object_extents) {
request_count += object_extent.second.size(); request_count += object_extent.second.size();
} }
aio_comp->set_request_count(request_count);
utime_t ts = ceph_clock_now();
image_ctx.timestamp_lock.get_read();
if(should_update_timestamp(ts,image_ctx.get_access_timestamp(),
image_ctx.atime_update_interval)) {
image_ctx.timestamp_lock.put_read();
image_ctx.timestamp_lock.get_write();
if(should_update_timestamp(ts,image_ctx.get_access_timestamp(),
image_ctx.atime_update_interval)) {
aio_comp->set_request_count(request_count + 1);
auto update_ctx = new C_UpdateTimestamp<I>(&image_ctx, aio_comp, false);
update_ctx->send();
image_ctx.set_access_timestamp(ts);
} else {
aio_comp->set_request_count(request_count);
}
image_ctx.timestamp_lock.put_write();
} else {
image_ctx.timestamp_lock.put_read();
aio_comp->set_request_count(request_count);
}
// issue the requests // issue the requests
for (auto &object_extent : object_extents) { for (auto &object_extent : object_extents) {
@ -346,34 +384,13 @@ void AbstractImageWriteRequest<I>::send_request() {
if (!object_extents.empty()) { if (!object_extents.empty()) {
uint64_t journal_tid = 0; uint64_t journal_tid = 0;
utime_t ts = ceph_clock_now();
image_ctx.timestamp_lock.get_read();
if(should_update_timestamp(ts,image_ctx.get_modify_timestamp(),
image_ctx.mtime_update_interval)) {
image_ctx.timestamp_lock.put_read();
image_ctx.timestamp_lock.get_write();
if(should_update_timestamp(ts,image_ctx.get_modify_timestamp(),
image_ctx.mtime_update_interval)) {
aio_comp->set_request_count(object_extents.size() + 1);
auto update_ctx = new C_UpdateTimestamp<I>(&image_ctx, aio_comp, true);
update_ctx->send();
image_ctx.set_modify_timestamp(ts);
} else {
aio_comp->set_request_count(object_extents.size());
}
image_ctx.timestamp_lock.put_write();
} else {
image_ctx.timestamp_lock.put_read();
aio_comp->set_request_count(object_extents.size());
}
if (journaling) { if (journaling) {
// in-flight ops are flushed prior to closing the journal // in-flight ops are flushed prior to closing the journal
ceph_assert(image_ctx.journal != NULL); ceph_assert(image_ctx.journal != NULL);
journal_tid = append_journal_event(m_synchronous); journal_tid = append_journal_event(m_synchronous);
} }
aio_comp->set_request_count(object_extents.size());
send_object_requests(object_extents, snapc, journal_tid); send_object_requests(object_extents, snapc, journal_tid);
} else { } else {
// no IO to perform -- fire completion // no IO to perform -- fire completion

View File

@ -84,6 +84,7 @@ protected:
virtual int clip_request(); virtual int clip_request();
virtual void update_timestamp();
virtual void send_request() = 0; virtual void send_request() = 0;
virtual void send_image_cache_request() = 0; virtual void send_image_cache_request() = 0;
@ -248,6 +249,8 @@ protected:
int clip_request() override { int clip_request() override {
return 0; return 0;
} }
void update_timestamp() override {
}
void send_request() override; void send_request() override;
void send_image_cache_request() override; void send_image_cache_request() override;

View File

@ -82,8 +82,9 @@ struct TestMockIoImageRequest : public TestMockFixture {
EXPECT_CALL(mock_image_ctx, get_modify_timestamp()) EXPECT_CALL(mock_image_ctx, get_modify_timestamp())
.WillOnce(Return(ceph_clock_now() - utime_t(10,0))); .WillOnce(Return(ceph_clock_now() - utime_t(10,0)));
} else { } else {
mock_image_ctx.mtime_update_interval = 0; mock_image_ctx.mtime_update_interval = 600;
EXPECT_CALL(mock_image_ctx, get_modify_timestamp()); EXPECT_CALL(mock_image_ctx, get_modify_timestamp())
.WillOnce(Return(ceph_clock_now()));
} }
} }
@ -226,8 +227,8 @@ TEST_F(TestMockIoImageRequest, AioWriteJournalAppendDisabled) {
mock_image_ctx.journal = &mock_journal; mock_image_ctx.journal = &mock_journal;
InSequence seq; InSequence seq;
expect_is_journal_appending(mock_journal, false);
expect_get_modify_timestamp(mock_image_ctx, false); expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, false);
expect_object_request_send(mock_image_ctx, 0); expect_object_request_send(mock_image_ctx, 0);
C_SaferCond aio_comp_ctx; C_SaferCond aio_comp_ctx;
@ -256,8 +257,8 @@ TEST_F(TestMockIoImageRequest, AioDiscardJournalAppendDisabled) {
mock_image_ctx.journal = &mock_journal; mock_image_ctx.journal = &mock_journal;
InSequence seq; InSequence seq;
expect_is_journal_appending(mock_journal, false);
expect_get_modify_timestamp(mock_image_ctx, false); expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, false);
expect_object_request_send(mock_image_ctx, 0); expect_object_request_send(mock_image_ctx, 0);
C_SaferCond aio_comp_ctx; C_SaferCond aio_comp_ctx;
@ -314,8 +315,8 @@ TEST_F(TestMockIoImageRequest, AioWriteSameJournalAppendDisabled) {
mock_image_ctx.journal = &mock_journal; mock_image_ctx.journal = &mock_journal;
InSequence seq; InSequence seq;
expect_is_journal_appending(mock_journal, false);
expect_get_modify_timestamp(mock_image_ctx, false); expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, false);
expect_object_request_send(mock_image_ctx, 0); expect_object_request_send(mock_image_ctx, 0);
C_SaferCond aio_comp_ctx; C_SaferCond aio_comp_ctx;
@ -345,8 +346,8 @@ TEST_F(TestMockIoImageRequest, AioCompareAndWriteJournalAppendDisabled) {
mock_image_ctx.journal = &mock_journal; mock_image_ctx.journal = &mock_journal;
InSequence seq; InSequence seq;
expect_is_journal_appending(mock_journal, false);
expect_get_modify_timestamp(mock_image_ctx, false); expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, false);
expect_object_request_send(mock_image_ctx, 0); expect_object_request_send(mock_image_ctx, 0);
C_SaferCond aio_comp_ctx; C_SaferCond aio_comp_ctx;