mirror of
https://github.com/ceph/ceph
synced 2025-02-22 02:27:29 +00:00
Merge pull request #9211 from dillaman/wip-15938
librbd: write-after-write might result in an inconsistent replicated image Reviewed-by: Mykola Golub <mgolub@mirantis.com>
This commit is contained in:
commit
17b1c91774
@ -325,7 +325,8 @@ write_image()
|
||||
local duration=$(($RANDOM % 35 + 15))
|
||||
|
||||
timeout ${duration}s rbd --cluster ${cluster} -p ${POOL} bench-write \
|
||||
${image} --io-size 4096 --io-threads 8 --io-total 10G --io-pattern rand || true
|
||||
${image} --io-size 4096 --io-threads 8 --io-total 10G --io-pattern rand \
|
||||
--debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-bench-write.log || true
|
||||
}
|
||||
|
||||
create_snap()
|
||||
@ -334,7 +335,8 @@ create_snap()
|
||||
local image=$2
|
||||
local snap_name=$3
|
||||
|
||||
rbd --cluster ${cluster} -p ${POOL} snap create ${image}@${snap_name}
|
||||
rbd --cluster ${cluster} -p ${POOL} snap create ${image}@${snap_name} \
|
||||
--debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-snap-create.log
|
||||
}
|
||||
|
||||
wait_for_snap()
|
||||
|
@ -351,13 +351,13 @@ int JournalPlayer::process_playback(uint64_t object_number) {
|
||||
ldout(m_cct, 10) << __func__ << ": object_num=" << object_number << dendl;
|
||||
assert(m_lock.is_locked());
|
||||
|
||||
ObjectPlayerPtr object_player = get_object_player();
|
||||
if (verify_playback_ready()) {
|
||||
notify_entries_available();
|
||||
} else if (is_object_set_ready()) {
|
||||
if (m_watch_enabled) {
|
||||
schedule_watch();
|
||||
} else {
|
||||
ObjectPlayerPtr object_player = get_object_player();
|
||||
uint8_t splay_width = m_journal_metadata->get_splay_width();
|
||||
uint64_t active_set = m_journal_metadata->get_active_set();
|
||||
uint64_t object_set = object_player->get_object_number() / splay_width;
|
||||
|
@ -167,6 +167,27 @@ namespace librbd {
|
||||
}
|
||||
};
|
||||
|
||||
struct C_CommitIOEventExtent : public Context {
|
||||
ImageCtx *image_ctx;
|
||||
uint64_t journal_tid;
|
||||
uint64_t offset;
|
||||
uint64_t length;
|
||||
|
||||
C_CommitIOEventExtent(ImageCtx *image_ctx, uint64_t journal_tid,
|
||||
uint64_t offset, uint64_t length)
|
||||
: image_ctx(image_ctx), journal_tid(journal_tid), offset(offset),
|
||||
length(length) {
|
||||
}
|
||||
|
||||
virtual void finish(int r) {
|
||||
// all IO operations are flushed prior to closing the journal
|
||||
assert(image_ctx->journal != nullptr);
|
||||
|
||||
image_ctx->journal->commit_io_event_extent(journal_tid, offset, length,
|
||||
r);
|
||||
}
|
||||
};
|
||||
|
||||
LibrbdWriteback::LibrbdWriteback(ImageCtx *ictx, Mutex& lock)
|
||||
: m_tid(0), m_lock(lock), m_ictx(ictx) {
|
||||
}
|
||||
@ -248,8 +269,8 @@ namespace librbd {
|
||||
assert(journal_tid == 0 || m_ictx->journal != NULL);
|
||||
if (journal_tid != 0) {
|
||||
m_ictx->journal->flush_event(
|
||||
journal_tid, new C_WriteJournalCommit(m_ictx, oid.name, object_no, off,
|
||||
bl, snapc, req_comp,
|
||||
journal_tid, new C_WriteJournalCommit(m_ictx, oid.name, object_no, off,
|
||||
bl, snapc, req_comp,
|
||||
journal_tid));
|
||||
} else {
|
||||
AioObjectWrite *req = new AioObjectWrite(m_ictx, oid.name, object_no,
|
||||
@ -262,22 +283,32 @@ namespace librbd {
|
||||
|
||||
void LibrbdWriteback::overwrite_extent(const object_t& oid, uint64_t off,
|
||||
uint64_t len,
|
||||
ceph_tid_t journal_tid) {
|
||||
ceph_tid_t original_journal_tid,
|
||||
ceph_tid_t new_journal_tid) {
|
||||
typedef std::vector<std::pair<uint64_t,uint64_t> > Extents;
|
||||
|
||||
assert(m_ictx->owner_lock.is_locked());
|
||||
uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix);
|
||||
|
||||
// all IO operations are flushed prior to closing the journal
|
||||
assert(journal_tid != 0 && m_ictx->journal != NULL);
|
||||
assert(original_journal_tid != 0 && m_ictx->journal != NULL);
|
||||
|
||||
Extents file_extents;
|
||||
Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, object_no, off,
|
||||
len, file_extents);
|
||||
for (Extents::iterator it = file_extents.begin();
|
||||
it != file_extents.end(); ++it) {
|
||||
m_ictx->journal->commit_io_event_extent(journal_tid, it->first,
|
||||
it->second, 0);
|
||||
if (new_journal_tid != 0) {
|
||||
// ensure new journal event is safely committed to disk before
|
||||
// committing old event
|
||||
m_ictx->journal->flush_event(
|
||||
new_journal_tid, new C_CommitIOEventExtent(m_ictx,
|
||||
original_journal_tid,
|
||||
it->first, it->second));
|
||||
} else {
|
||||
m_ictx->journal->commit_io_event_extent(original_journal_tid, it->first,
|
||||
it->second, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,8 @@ namespace librbd {
|
||||
using WritebackHandler::write;
|
||||
|
||||
virtual void overwrite_extent(const object_t& oid, uint64_t off,
|
||||
uint64_t len, ceph_tid_t journal_tid);
|
||||
uint64_t len, ceph_tid_t original_journal_tid,
|
||||
ceph_tid_t new_journal_tid);
|
||||
|
||||
virtual void get_client_lock();
|
||||
virtual void put_client_lock();
|
||||
|
@ -462,7 +462,7 @@ void ObjectCacher::Object::replace_journal_tid(BufferHead *bh,
|
||||
if (bh_tid != 0 && bh_tid != tid) {
|
||||
// inform journal that it should not expect a writeback from this extent
|
||||
oc->writeback_handler.overwrite_extent(get_oid(), bh->start(),
|
||||
bh->length(), bh_tid);
|
||||
bh->length(), bh_tid, tid);
|
||||
}
|
||||
bh->set_journal_tid(tid);
|
||||
}
|
||||
|
@ -37,7 +37,8 @@ class WritebackHandler {
|
||||
ceph_tid_t journal_tid, Context *oncommit) = 0;
|
||||
|
||||
virtual void overwrite_extent(const object_t& oid, uint64_t off, uint64_t len,
|
||||
ceph_tid_t journal_tid) {}
|
||||
ceph_tid_t original_journal_tid,
|
||||
ceph_tid_t new_journal_tid) {}
|
||||
|
||||
virtual bool can_scattered_write() { return false; }
|
||||
virtual ceph_tid_t write(const object_t& oid, const object_locator_t& oloc,
|
||||
|
Loading…
Reference in New Issue
Block a user